From 8a644b459e019d1f2a20f5dc436ea7d461d3cac8 Mon Sep 17 00:00:00 2001
From: Arthur Cohen <arthur.cohen@embecosm.com>
Date: Thu, 9 Nov 2023 18:32:52 +0100
Subject: [PATCH 01/13] fmt: Start working on format_args!() parser

This commit adds a base class for parsing the various constructs of a
Rust format string, according to the grammar in the reference:

https://doc.rust-lang.org/std/fmt/index.html#syntax

gcc/rust/ChangeLog:

	* Make-lang.in: Compile rust-fmt object
	* ast/rust-fmt.cc: New file.
	* ast/rust-fmt.h: New file.
---
 gcc/rust/Make-lang.in    |   1 +
 gcc/rust/ast/rust-fmt.cc |  96 ++++++++++++++++++++++++++++
 gcc/rust/ast/rust-fmt.h  | 133 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 230 insertions(+)
 create mode 100644 gcc/rust/ast/rust-fmt.cc
 create mode 100644 gcc/rust/ast/rust-fmt.h

diff --git a/gcc/rust/Make-lang.in b/gcc/rust/Make-lang.in
index 4d6460187924..6696b471d476 100644
--- a/gcc/rust/Make-lang.in
+++ b/gcc/rust/Make-lang.in
@@ -100,6 +100,7 @@ GRS_OBJS = \
     rust/rust-proc-macro-invoc-lexer.o \
     rust/rust-macro-substitute-ctx.o \
     rust/rust-macro-builtins.o \
+	rust/rust-fmt.o \
     rust/rust-hir.o \
     rust/rust-hir-map.o \
     rust/rust-attributes.o \
diff --git a/gcc/rust/ast/rust-fmt.cc b/gcc/rust/ast/rust-fmt.cc
new file mode 100644
index 000000000000..9f9ba48f0c3a
--- /dev/null
+++ b/gcc/rust/ast/rust-fmt.cc
@@ -0,0 +1,96 @@
+// Copyright (C) 2020-2023 Free Software Foundation, Inc.
+
+// This file is part of GCC.
+
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with GCC; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#include "rust-fmt.h"
+
+namespace Rust {
+tl::expected<Fmt, Fmt::Error>
+Fmt::parse_fmt_string (Fmt::Input input)
+{
+  return Fmt ();
+}
+
+tl::expected<Fmt::Result<tl::optional<Fmt::Format>>, Fmt::Error>
+Fmt::maybe_format (Fmt::Input input)
+{
+  tl::optional<Fmt::Format> none = tl::nullopt;
+
+  return Fmt::Result (input, none);
+}
+
+tl::expected<Fmt::Result<Fmt::Format>, Fmt::Error>
+Fmt::format (Input input)
+{
+  return Fmt::Result (input, Format ());
+}
+
+tl::expected<Fmt::Result<Fmt::Argument>, Fmt::Error>
+Fmt::argument (Input input)
+{
+  return Fmt::Result (input, Argument ());
+}
+
+tl::expected<Fmt::Result<Fmt::FormatSpec>, Fmt::Error>
+Fmt::format_spec (Input input)
+{
+  return Fmt::Result (input, FormatSpec ());
+}
+
+tl::expected<Fmt::Result<Fmt::Fill>, Fmt::Error>
+Fmt::fill (Input input)
+{
+  return Fmt::Result (input, Fill ());
+}
+
+tl::expected<Fmt::Result<Fmt::Align>, Fmt::Error>
+Fmt::align (Input input)
+{
+  switch (input[0])
+    {
+    case '<':
+      return Fmt::Result (input.substr (1), Align::Left);
+    case '^':
+      return Fmt::Result (input.substr (1), Align::Top);
+    case '>':
+      return Fmt::Result (input.substr (1), Align::Right);
+    default:
+      // TODO: Store the character here
+      // TODO: Can we have proper error locations?
+      // TODO: Maybe we should use a Rust::Literal string instead of a string
+      return tl::make_unexpected (Error::Align);
+    }
+}
+
+tl::expected<Fmt::Result<Fmt::Sign>, Fmt::Error>
+Fmt::sign (Input input)
+{
+  switch (input[0])
+    {
+    case '+':
+      return Fmt::Result (input.substr (1), Sign::Plus);
+    case '-':
+      return Fmt::Result (input.substr (1), Sign::Minus);
+    default:
+      // TODO: Store the character here
+      // TODO: Can we have proper error locations?
+      // TODO: Maybe we should use a Rust::Literal string instead of a string
+      return tl::make_unexpected (Error::Sign);
+    }
+}
+
+} // namespace Rust
diff --git a/gcc/rust/ast/rust-fmt.h b/gcc/rust/ast/rust-fmt.h
new file mode 100644
index 000000000000..f3dd53da9791
--- /dev/null
+++ b/gcc/rust/ast/rust-fmt.h
@@ -0,0 +1,133 @@
+// Copyright (C) 2020-2023 Free Software Foundation, Inc.
+
+// This file is part of GCC.
+
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with GCC; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#ifndef RUST_FMT_H
+#define RUST_FMT_H
+
+#include "expected.h"
+#include "optional.h"
+#include "rust-ast.h"
+#include "rust-system.h"
+
+namespace Rust {
+
+/**
+ * This class implements the parsing of Rust format strings according to the
+ * grammar here: https://doc.rust-lang.org/std/fmt/index.html#syntax
+ */
+// TODO: Are there features that are only present in specific Rust editions?
+class Fmt
+{
+public:
+  // TODO: Keep location information
+  // TODO: Switch to a Rust::AST::Literal here
+  using Input = std::string;
+
+  enum class Error
+  {
+    Align,
+    Sign,
+  };
+
+  template <typename T> class Result
+  {
+  public:
+    explicit Result (Input remaining_input, T result)
+      : remaining_input (remaining_input), result (result)
+    {}
+
+  private:
+    Input remaining_input;
+    T result;
+  };
+
+  // FIXME: Do not use an owned string here
+  static tl::expected<Fmt, Fmt::Error> parse_fmt_string (Input input);
+
+private:
+  // the parse functions should return the remaining input as well as the
+  // expected node let's look at nom
+  // TODO: no string view :( use an owned string for now?
+
+  template <typename T> struct ParseResult
+  {
+    tl::expected<Result<T>, Error> inner;
+
+    ParseResult (tl::expected<Result<T>, Error> inner) : inner (inner) {}
+    ParseResult operator= (tl::expected<Result<T>, Error> inner)
+    {
+      return ParseResult (inner);
+    }
+
+    Input remaining_input () { return inner->remaining_input; }
+    T value () { return inner->value; }
+  };
+
+  struct Format
+  {
+  };
+
+  struct Argument
+  {
+    enum struct Kind
+    {
+      Integer,
+      Identifier,
+    } kind;
+
+    int integer;
+    Identifier identifier;
+  };
+
+  struct FormatSpec
+  {
+  };
+
+  struct Fill
+  {
+    char to_fill;
+  };
+
+  enum class Align
+  {
+    Left,
+    Top,
+    Right
+  };
+
+  enum class Sign
+  {
+    Plus,
+    Minus
+  };
+
+  // let's do one function per rule in the BNF
+  static tl::expected<Result<std::string>, Error> text (Input input);
+  static tl::expected<Result<tl::optional<Format>>, Error>
+  maybe_format (Input input);
+  static tl::expected<Result<Format>, Error> format (Input input);
+  static tl::expected<Result<Argument>, Error> argument (Input input);
+  static tl::expected<Result<FormatSpec>, Error> format_spec (Input input);
+  static tl::expected<Result<Fill>, Error> fill (Input input);
+  static tl::expected<Result<Align>, Error> align (Input input);
+  static tl::expected<Result<Sign>, Error> sign (Input input);
+};
+
+} // namespace Rust
+
+#endif // ! RUST_FMT_H

From 7f587c753caf189a16b8c3ad8fdef496e925badf Mon Sep 17 00:00:00 2001
From: Arthur Cohen <arthur.cohen@embecosm.com>
Date: Thu, 30 Nov 2023 14:11:41 +0100
Subject: [PATCH 02/13] libgrust: Add format_parser library

Compile libformat_parser and link to it.

gcc/rust/ChangeLog:

	* Make-lang.in: Compile libformat_parser.
	* ast/rust-fmt.cc: New FFI definitions.
	* ast/rust-fmt.h: Likewise.
	* expand/rust-macro-builtins.cc (MacroBuiltin::format_args_handler): Call
	into libformat_parser.
	* expand/rust-macro-builtins.h: Define format_args!() handler proper.

libgrust/ChangeLog:

	* libformat_parser/Cargo.lock: New file.
	* libformat_parser/Cargo.toml: New file.
	* libformat_parser/generic_format_parser/Cargo.toml: New file.
	* libformat_parser/generic_format_parser/src/lib.rs: New file.
	* libformat_parser/src/bin.rs: New file.
	* libformat_parser/src/lib.rs: New file.
---
 gcc/rust/Make-lang.in                         |   15 +-
 gcc/rust/ast/rust-fmt.cc                      |   77 +-
 gcc/rust/ast/rust-fmt.h                       |  189 +--
 gcc/rust/expand/rust-macro-builtins.cc        |   12 +-
 gcc/rust/expand/rust-macro-builtins.h         |    3 +
 libgrust/libformat_parser/Cargo.lock          |   30 +
 libgrust/libformat_parser/Cargo.toml          |   21 +
 .../generic_format_parser/Cargo.toml          |    9 +
 .../generic_format_parser/src/lib.rs          | 1102 +++++++++++++++++
 libgrust/libformat_parser/src/bin.rs          |    7 +
 libgrust/libformat_parser/src/lib.rs          |   41 +
 11 files changed, 1351 insertions(+), 155 deletions(-)
 create mode 100644 libgrust/libformat_parser/Cargo.lock
 create mode 100644 libgrust/libformat_parser/Cargo.toml
 create mode 100644 libgrust/libformat_parser/generic_format_parser/Cargo.toml
 create mode 100644 libgrust/libformat_parser/generic_format_parser/src/lib.rs
 create mode 100644 libgrust/libformat_parser/src/bin.rs
 create mode 100644 libgrust/libformat_parser/src/lib.rs

diff --git a/gcc/rust/Make-lang.in b/gcc/rust/Make-lang.in
index 6696b471d476..7c8ab6e78464 100644
--- a/gcc/rust/Make-lang.in
+++ b/gcc/rust/Make-lang.in
@@ -54,6 +54,8 @@ GCCRS_D_OBJS = \
    rust/rustspec.o \
    $(END)
 
+LIBS += -ldl -lpthread
+
 gccrs$(exeext): $(GCCRS_D_OBJS) $(EXTRA_GCC_OBJS) libcommon-target.a $(LIBDEPS)
 	+$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \
 	  $(GCCRS_D_OBJS) $(EXTRA_GCC_OBJS) libcommon-target.a \
@@ -209,14 +211,14 @@ RUST_ALL_OBJS = $(GRS_OBJS) $(RUST_TARGET_OBJS)
 
 rust_OBJS = $(RUST_ALL_OBJS) rust/rustspec.o
 
-RUST_LDFLAGS = $(LDFLAGS) -L./../libgrust/libproc_macro_internal
-RUST_LIBDEPS = $(LIBDEPS) ../libgrust/libproc_macro_internal/libproc_macro_internal.a
+RUST_LDFLAGS = $(LDFLAGS) -L./../libgrust/libproc_macro_internal -L./../libgrust/librustc_format_parser/
+RUST_LIBDEPS = $(LIBDEPS) ../libgrust/libproc_macro_internal/libproc_macro_internal.a rust/libformat_parser.a
 
 # The compiler itself is called crab1
 crab1$(exeext): $(RUST_ALL_OBJS) attribs.o $(BACKEND) $(RUST_LIBDEPS) $(rust.prev)
 	@$(call LINK_PROGRESS,$(INDEX.rust),start)
 	+$(LLINKER) $(ALL_LINKERFLAGS) $(RUST_LDFLAGS) -o $@ \
-	      $(RUST_ALL_OBJS) attribs.o $(BACKEND) $(LIBS) ../libgrust/libproc_macro_internal/libproc_macro_internal.a $(BACKENDLIBS)
+	      $(RUST_ALL_OBJS) attribs.o $(BACKEND) $(LIBS) ../libgrust/libproc_macro_internal/libproc_macro_internal.a rust/libformat_parser.a $(BACKENDLIBS)
 	@$(call LINK_PROGRESS,$(INDEX.rust),end)
 
 # Build hooks.
@@ -402,6 +404,13 @@ rust/%.o: rust/lex/%.cc
 	$(COMPILE) $(RUST_CXXFLAGS) $(RUST_INCLUDES) $<
 	$(POSTCOMPILE)
 
+%.toml: 
+	echo $@
+
+rust/libformat_parser.a: $(srcdir)/../libgrust/libformat_parser/Cargo.toml $(wildcard $(srcdir)/../libgrust/libformat_parser/src/*.rs)
+	cargo build --manifest-path $(srcdir)/../libgrust/libformat_parser/Cargo.toml --release # FIXME: Not always release, right?
+	cp $(srcdir)/../libgrust/libformat_parser/target/release/liblibformat_parser.a $@
+
 # build all rust/parse files in rust folder, add cross-folder includes
 rust/%.o: rust/parse/%.cc
 	$(COMPILE) $(RUST_CXXFLAGS) $(RUST_INCLUDES) $<
diff --git a/gcc/rust/ast/rust-fmt.cc b/gcc/rust/ast/rust-fmt.cc
index 9f9ba48f0c3a..559b1c8b5795 100644
--- a/gcc/rust/ast/rust-fmt.cc
+++ b/gcc/rust/ast/rust-fmt.cc
@@ -19,78 +19,23 @@
 #include "rust-fmt.h"
 
 namespace Rust {
-tl::expected<Fmt, Fmt::Error>
-Fmt::parse_fmt_string (Fmt::Input input)
-{
-  return Fmt ();
-}
+namespace Fmt {
 
-tl::expected<Fmt::Result<tl::optional<Fmt::Format>>, Fmt::Error>
-Fmt::maybe_format (Fmt::Input input)
+Pieces
+Pieces::collect (const std::string &to_parse)
 {
-  tl::optional<Fmt::Format> none = tl::nullopt;
+  auto piece_slice = collect_pieces (to_parse.c_str ());
 
-  return Fmt::Result (input, none);
-}
+  rust_debug ("[ARTHUR] %p, %lu", (void *) piece_slice.ptr, piece_slice.len);
 
-tl::expected<Fmt::Result<Fmt::Format>, Fmt::Error>
-Fmt::format (Input input)
-{
-  return Fmt::Result (input, Format ());
-}
+  // this performs multiple copies, can we avoid them maybe?
+  auto pieces
+    = std::vector (piece_slice.ptr, piece_slice.ptr + piece_slice.len);
 
-tl::expected<Fmt::Result<Fmt::Argument>, Fmt::Error>
-Fmt::argument (Input input)
-{
-  return Fmt::Result (input, Argument ());
-}
+  rust_debug ("[ARTHUR] %p, %lu", (void *) pieces.data (), pieces.size ());
 
-tl::expected<Fmt::Result<Fmt::FormatSpec>, Fmt::Error>
-Fmt::format_spec (Input input)
-{
-  return Fmt::Result (input, FormatSpec ());
-}
-
-tl::expected<Fmt::Result<Fmt::Fill>, Fmt::Error>
-Fmt::fill (Input input)
-{
-  return Fmt::Result (input, Fill ());
-}
-
-tl::expected<Fmt::Result<Fmt::Align>, Fmt::Error>
-Fmt::align (Input input)
-{
-  switch (input[0])
-    {
-    case '<':
-      return Fmt::Result (input.substr (1), Align::Left);
-    case '^':
-      return Fmt::Result (input.substr (1), Align::Top);
-    case '>':
-      return Fmt::Result (input.substr (1), Align::Right);
-    default:
-      // TODO: Store the character here
-      // TODO: Can we have proper error locations?
-      // TODO: Maybe we should use a Rust::Literal string instead of a string
-      return tl::make_unexpected (Error::Align);
-    }
-}
-
-tl::expected<Fmt::Result<Fmt::Sign>, Fmt::Error>
-Fmt::sign (Input input)
-{
-  switch (input[0])
-    {
-    case '+':
-      return Fmt::Result (input.substr (1), Sign::Plus);
-    case '-':
-      return Fmt::Result (input.substr (1), Sign::Minus);
-    default:
-      // TODO: Store the character here
-      // TODO: Can we have proper error locations?
-      // TODO: Maybe we should use a Rust::Literal string instead of a string
-      return tl::make_unexpected (Error::Sign);
-    }
+  return Pieces{};
 }
 
+} // namespace Fmt
 } // namespace Rust
diff --git a/gcc/rust/ast/rust-fmt.h b/gcc/rust/ast/rust-fmt.h
index f3dd53da9791..0050977358f1 100644
--- a/gcc/rust/ast/rust-fmt.h
+++ b/gcc/rust/ast/rust-fmt.h
@@ -19,115 +19,134 @@
 #ifndef RUST_FMT_H
 #define RUST_FMT_H
 
-#include "expected.h"
-#include "optional.h"
-#include "rust-ast.h"
+#include "rust-diagnostics.h"
 #include "rust-system.h"
 
 namespace Rust {
+namespace Fmt {
 
-/**
- * This class implements the parsing of Rust format strings according to the
- * grammar here: https://doc.rust-lang.org/std/fmt/index.html#syntax
- */
-// TODO: Are there features that are only present in specific Rust editions?
-class Fmt
+struct RustHamster
 {
-public:
-  // TODO: Keep location information
-  // TODO: Switch to a Rust::AST::Literal here
-  using Input = std::string;
+  // hehe
+};
 
-  enum class Error
-  {
-    Align,
-    Sign,
-  };
+struct InnerSpan
+{
+};
 
-  template <typename T> class Result
+struct Count
+{
+  enum class Kind
+  {
+    Is,
+    IsName,
+    IsParam,
+    IsStar,
+    Implied
+  } kind;
+
+  union
   {
-  public:
-    explicit Result (Input remaining_input, T result)
-      : remaining_input (remaining_input), result (result)
-    {}
+    size_t is;
+    std::pair<RustHamster, InnerSpan> is_name;
+    size_t is_param;
+    size_t is_star;
+  } data;
+};
 
-  private:
-    Input remaining_input;
-    T result;
-  };
+struct DebugHex
+{
+};
 
-  // FIXME: Do not use an owned string here
-  static tl::expected<Fmt, Fmt::Error> parse_fmt_string (Input input);
+struct Sign
+{
+};
 
-private:
-  // the parse functions should return the remaining input as well as the
-  // expected node let's look at nom
-  // TODO: no string view :( use an owned string for now?
+struct Alignment
+{
+};
 
-  template <typename T> struct ParseResult
-  {
-    tl::expected<Result<T>, Error> inner;
+struct RustString
+{
+  // hehe
+};
 
-    ParseResult (tl::expected<Result<T>, Error> inner) : inner (inner) {}
-    ParseResult operator= (tl::expected<Result<T>, Error> inner)
-    {
-      return ParseResult (inner);
-    }
+struct Position
+{
+};
 
-    Input remaining_input () { return inner->remaining_input; }
-    T value () { return inner->value; }
-  };
+struct FormatSpec
+{
+  /// Optionally specified character to fill alignment with.
+  tl::optional<char /* FIXME: This is a Rust char, not a C++ char - use an uint32_t instead?  */> fill;
+  /// Span of the optionally specified fill character.
+  tl::optional<InnerSpan> fill_span;
+  /// Optionally specified alignment.
+  Alignment align;
+  /// The `+` or `-` flag.
+  tl::optional<Sign> sign;
+  /// The `#` flag.
+  bool alternate;
+  /// The `0` flag.
+  bool zero_pad;
+  /// The `x` or `X` flag. (Only for `Debug`.)
+  tl::optional<DebugHex> debug_hex;
+  /// The integer precision to use.
+  // Count <'a> precision;
+  /// The span of the precision formatting flag (for diagnostics).
+  tl::optional<InnerSpan> precision_span;
+  /// The string width requested for the resulting format.
+  // Count <'a> width;
+  /// The span of the width formatting flag (for diagnostics).
+  tl::optional<InnerSpan> width_span;
+  /// The descriptor string representing the name of the format desired for
+  /// this argument, this can be empty or any number of characters, although
+  /// it is required to be one word.
+  RustHamster ty;
+  // &'a str ty;
+  /// The span of the descriptor string (for diagnostics).
+  tl::optional<InnerSpan> ty_span;
+};
 
-  struct Format
-  {
-  };
+struct Argument
+{
+  Position position;
+  InnerSpan inner_span;
+  FormatSpec format;
+};
 
-  struct Argument
+struct Piece
+{
+  enum class Kind
   {
-    enum struct Kind
-    {
-      Integer,
-      Identifier,
-    } kind;
+    String,
+    NextArgument
+  } kind;
 
-    int integer;
-    Identifier identifier;
-  };
-
-  struct FormatSpec
+  union
   {
-  };
+    RustString string;
+    Argument *next_argument;
+  } data;
+};
 
-  struct Fill
-  {
-    char to_fill;
-  };
+struct PieceSlice
+{
+  Piece *ptr;
+  size_t len;
+};
 
-  enum class Align
-  {
-    Left,
-    Top,
-    Right
-  };
+extern "C" {
+PieceSlice
+collect_pieces (const char *);
+}
 
-  enum class Sign
-  {
-    Plus,
-    Minus
-  };
-
-  // let's do one function per rule in the BNF
-  static tl::expected<Result<std::string>, Error> text (Input input);
-  static tl::expected<Result<tl::optional<Format>>, Error>
-  maybe_format (Input input);
-  static tl::expected<Result<Format>, Error> format (Input input);
-  static tl::expected<Result<Argument>, Error> argument (Input input);
-  static tl::expected<Result<FormatSpec>, Error> format_spec (Input input);
-  static tl::expected<Result<Fill>, Error> fill (Input input);
-  static tl::expected<Result<Align>, Error> align (Input input);
-  static tl::expected<Result<Sign>, Error> sign (Input input);
+struct Pieces
+{
+  static Pieces collect (const std::string &to_parse);
 };
 
+} // namespace Fmt
 } // namespace Rust
 
 #endif // ! RUST_FMT_H
diff --git a/gcc/rust/expand/rust-macro-builtins.cc b/gcc/rust/expand/rust-macro-builtins.cc
index 71da575563db..0e57406f10f8 100644
--- a/gcc/rust/expand/rust-macro-builtins.cc
+++ b/gcc/rust/expand/rust-macro-builtins.cc
@@ -30,6 +30,7 @@
 #include "rust-parse.h"
 #include "rust-session-manager.h"
 #include "rust-attribute-values.h"
+#include "rust-fmt.h"
 
 namespace Rust {
 
@@ -89,8 +90,8 @@ std::unordered_map<std::string, AST::MacroTranscriberFunc>
     {"env", MacroBuiltin::env_handler},
     {"cfg", MacroBuiltin::cfg_handler},
     {"include", MacroBuiltin::include_handler},
+    {"format_args", MacroBuiltin::format_args_handler},
     /* Unimplemented macro builtins */
-    {"format_args", MacroBuiltin::sorry},
     {"option_env", MacroBuiltin::sorry},
     {"format_args_nl", MacroBuiltin::sorry},
     {"concat_idents", MacroBuiltin::sorry},
@@ -942,6 +943,15 @@ MacroBuiltin::stringify_handler (location_t invoc_locus,
   return AST::Fragment ({node}, std::move (token));
 }
 
+tl::optional<AST::Fragment>
+MacroBuiltin::format_args_handler (location_t invoc_locus,
+				   AST::MacroInvocData &invoc)
+{
+  Fmt::Pieces::collect ("heyo this {is} what I {} want to {3}, {parse}");
+
+  return AST::Fragment::create_empty ();
+}
+
 tl::optional<AST::Fragment>
 MacroBuiltin::sorry (location_t invoc_locus, AST::MacroInvocData &invoc)
 {
diff --git a/gcc/rust/expand/rust-macro-builtins.h b/gcc/rust/expand/rust-macro-builtins.h
index 6a84a8b86f68..f9ab3fc3698e 100644
--- a/gcc/rust/expand/rust-macro-builtins.h
+++ b/gcc/rust/expand/rust-macro-builtins.h
@@ -157,6 +157,9 @@ class MacroBuiltin
   static tl::optional<AST::Fragment> line_handler (location_t invoc_locus,
 						   AST::MacroInvocData &invoc);
 
+  static tl::optional<AST::Fragment>
+  format_args_handler (location_t invoc_locus, AST::MacroInvocData &invoc);
+
   static tl::optional<AST::Fragment> sorry (location_t invoc_locus,
 					    AST::MacroInvocData &invoc);
 
diff --git a/libgrust/libformat_parser/Cargo.lock b/libgrust/libformat_parser/Cargo.lock
new file mode 100644
index 000000000000..65e48263c71a
--- /dev/null
+++ b/libgrust/libformat_parser/Cargo.lock
@@ -0,0 +1,30 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "generic_format_parser"
+version = "0.1.0"
+dependencies = [
+ "unicode-xid",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.152"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7"
+
+[[package]]
+name = "libformat_parser"
+version = "0.1.0"
+dependencies = [
+ "generic_format_parser",
+ "libc",
+]
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
diff --git a/libgrust/libformat_parser/Cargo.toml b/libgrust/libformat_parser/Cargo.toml
new file mode 100644
index 000000000000..0fcfa3e89a4c
--- /dev/null
+++ b/libgrust/libformat_parser/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "libformat_parser"
+version = "0.1.0"
+edition = "2021"
+
+[workspace]
+
+members = [
+  "generic_format_parser",
+]
+
+[dependencies]
+libc = "0.2"
+generic_format_parser = { path = "generic_format_parser" }
+
+[lib]
+crate_type = ["staticlib", "rlib"]
+
+[[bin]]
+name = "format_parser_test"
+path = "src/bin.rs"
diff --git a/libgrust/libformat_parser/generic_format_parser/Cargo.toml b/libgrust/libformat_parser/generic_format_parser/Cargo.toml
new file mode 100644
index 000000000000..34577038cbed
--- /dev/null
+++ b/libgrust/libformat_parser/generic_format_parser/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "generic_format_parser"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+unicode-xid = "0.2.0"
diff --git a/libgrust/libformat_parser/generic_format_parser/src/lib.rs b/libgrust/libformat_parser/generic_format_parser/src/lib.rs
new file mode 100644
index 000000000000..f42c9d8dffbb
--- /dev/null
+++ b/libgrust/libformat_parser/generic_format_parser/src/lib.rs
@@ -0,0 +1,1102 @@
+//! Macro support for format strings
+//!
+//! These structures are used when parsing format strings for the compiler.
+//! Parsing does not happen at runtime: structures of `std::fmt::rt` are
+//! generated instead.
+
+#![doc(
+    html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/",
+    html_playground_url = "https://play.rust-lang.org/",
+    test(attr(deny(warnings)))
+)]
+#![deny(rustc::untranslatable_diagnostic)]
+#![deny(rustc::diagnostic_outside_of_impl)]
+// WARNING: We want to be able to build this crate with a stable compiler,
+//          so no `#![feature]` attributes should be added!
+
+#[deprecated(note = "Use a proper lexer function for this")]
+fn is_id_start(c: char) -> bool {
+    c == '_' || unicode_xid::UnicodeXID::is_xid_start(c)
+}
+
+#[deprecated(note = "Use a proper lexer function for this")]
+fn is_id_continue(c: char) -> bool {
+    unicode_xid::UnicodeXID::is_xid_continue(c)
+}
+
+// use rustc_lexer::unescape;
+pub use Alignment::*;
+pub use Count::*;
+pub use Piece::*;
+pub use Position::*;
+
+use std::iter;
+use std::str;
+use std::string;
+
+// Note: copied from rustc_span
+/// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub struct InnerSpan {
+    pub start: usize,
+    pub end: usize,
+}
+
+impl InnerSpan {
+    pub fn new(start: usize, end: usize) -> InnerSpan {
+        InnerSpan { start, end }
+    }
+}
+
+/// The location and before/after width of a character whose width has changed from its source code
+/// representation
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub struct InnerWidthMapping {
+    /// Index of the character in the source
+    pub position: usize,
+    /// The inner width in characters
+    pub before: usize,
+    /// The transformed width in characters
+    pub after: usize,
+}
+
+impl InnerWidthMapping {
+    pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
+        InnerWidthMapping {
+            position,
+            before,
+            after,
+        }
+    }
+}
+
+/// Whether the input string is a literal. If yes, it contains the inner width mappings.
+#[derive(Clone, PartialEq, Eq)]
+enum InputStringKind {
+    NotALiteral,
+    Literal {
+        width_mappings: Vec<InnerWidthMapping>,
+    },
+}
+
+/// The type of format string that we are parsing.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum ParseMode {
+    /// A normal format string as per `format_args!`.
+    Format,
+    /// An inline assembly template string for `asm!`.
+    InlineAsm,
+}
+
+#[derive(Copy, Clone)]
+struct InnerOffset(usize);
+
+impl InnerOffset {
+    fn to(self, end: InnerOffset) -> InnerSpan {
+        InnerSpan::new(self.0, end.0)
+    }
+}
+
+/// A piece is a portion of the format string which represents the next part
+/// to emit. These are emitted as a stream by the `Parser` class.
+#[derive(Clone, Debug, PartialEq)]
+pub enum Piece<'a> {
+    /// A literal string which should directly be emitted
+    String(&'a str),
+    /// This describes that formatting should process the next argument (as
+    /// specified inside) for emission.
+    NextArgument(Box<Argument<'a>>),
+}
+
+/// Representation of an argument specification.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub struct Argument<'a> {
+    /// Where to find this argument
+    pub position: Position<'a>,
+    /// The span of the position indicator. Includes any whitespace in implicit
+    /// positions (`{  }`).
+    pub position_span: InnerSpan,
+    /// How to format the argument
+    pub format: FormatSpec<'a>,
+}
+
+/// Specification for the formatting of an argument in the format string.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub struct FormatSpec<'a> {
+    /// Optionally specified character to fill alignment with.
+    pub fill: Option<char>,
+    /// Span of the optionally specified fill character.
+    pub fill_span: Option<InnerSpan>,
+    /// Optionally specified alignment.
+    pub align: Alignment,
+    /// The `+` or `-` flag.
+    pub sign: Option<Sign>,
+    /// The `#` flag.
+    pub alternate: bool,
+    /// The `0` flag.
+    pub zero_pad: bool,
+    /// The `x` or `X` flag. (Only for `Debug`.)
+    pub debug_hex: Option<DebugHex>,
+    /// The integer precision to use.
+    pub precision: Count<'a>,
+    /// The span of the precision formatting flag (for diagnostics).
+    pub precision_span: Option<InnerSpan>,
+    /// The string width requested for the resulting format.
+    pub width: Count<'a>,
+    /// The span of the width formatting flag (for diagnostics).
+    pub width_span: Option<InnerSpan>,
+    /// The descriptor string representing the name of the format desired for
+    /// this argument, this can be empty or any number of characters, although
+    /// it is required to be one word.
+    pub ty: &'a str,
+    /// The span of the descriptor string (for diagnostics).
+    pub ty_span: Option<InnerSpan>,
+}
+
+/// Enum describing where an argument for a format can be located.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum Position<'a> {
+    /// The argument is implied to be located at an index
+    ArgumentImplicitlyIs(usize),
+    /// The argument is located at a specific index given in the format,
+    ArgumentIs(usize),
+    /// The argument has a name.
+    ArgumentNamed(&'a str),
+}
+
+impl Position<'_> {
+    pub fn index(&self) -> Option<usize> {
+        match self {
+            ArgumentIs(i, ..) | ArgumentImplicitlyIs(i) => Some(*i),
+            _ => None,
+        }
+    }
+}
+
+/// Enum of alignments which are supported.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum Alignment {
+    /// The value will be aligned to the left.
+    AlignLeft,
+    /// The value will be aligned to the right.
+    AlignRight,
+    /// The value will be aligned in the center.
+    AlignCenter,
+    /// The value will take on a default alignment.
+    AlignUnknown,
+}
+
+/// Enum for the sign flags.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum Sign {
+    /// The `+` flag.
+    Plus,
+    /// The `-` flag.
+    Minus,
+}
+
+/// Enum for the debug hex flags.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum DebugHex {
+    /// The `x` flag in `{:x?}`.
+    Lower,
+    /// The `X` flag in `{:X?}`.
+    Upper,
+}
+
+/// A count is used for the precision and width parameters of an integer, and
+/// can reference either an argument or a literal integer.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum Count<'a> {
+    /// The count is specified explicitly.
+    CountIs(usize),
+    /// The count is specified by the argument with the given name.
+    CountIsName(&'a str, InnerSpan),
+    /// The count is specified by the argument at the given index.
+    CountIsParam(usize),
+    /// The count is specified by a star (like in `{:.*}`) that refers to the argument at the given index.
+    CountIsStar(usize),
+    /// The count is implied and cannot be explicitly specified.
+    CountImplied,
+}
+
+pub struct ParseError {
+    pub description: string::String,
+    pub note: Option<string::String>,
+    pub label: string::String,
+    pub span: InnerSpan,
+    pub secondary_label: Option<(string::String, InnerSpan)>,
+    pub suggestion: Suggestion,
+}
+
+pub enum Suggestion {
+    None,
+    /// Replace inline argument with positional argument:
+    /// `format!("{foo.bar}")` -> `format!("{}", foo.bar)`
+    UsePositional,
+    /// Remove `r#` from identifier:
+    /// `format!("{r#foo}")` -> `format!("{foo}")`
+    RemoveRawIdent(InnerSpan),
+}
+
+/// The parser structure for interpreting the input format string. This is
+/// modeled as an iterator over `Piece` structures to form a stream of tokens
+/// being output.
+///
+/// This is a recursive-descent parser for the sake of simplicity, and if
+/// necessary there's probably lots of room for improvement performance-wise.
+pub struct Parser<'a> {
+    mode: ParseMode,
+    input: &'a str,
+    cur: iter::Peekable<str::CharIndices<'a>>,
+    /// Error messages accumulated during parsing
+    pub errors: Vec<ParseError>,
+    /// Current position of implicit positional argument pointer
+    pub curarg: usize,
+    /// `Some(raw count)` when the string is "raw", used to position spans correctly
+    style: Option<usize>,
+    /// Start and end byte offset of every successfully parsed argument
+    pub arg_places: Vec<InnerSpan>,
+    /// Characters whose length has been changed from their in-code representation
+    width_map: Vec<InnerWidthMapping>,
+    /// Span of the last opening brace seen, used for error reporting
+    last_opening_brace: Option<InnerSpan>,
+    /// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
+    append_newline: bool,
+    /// Whether this formatting string was written directly in the source. This controls whether we
+    /// can use spans to refer into it and give better error messages.
+    /// N.B: This does _not_ control whether implicit argument captures can be used.
+    pub is_source_literal: bool,
+    /// Start position of the current line.
+    cur_line_start: usize,
+    /// Start and end byte offset of every line of the format string. Excludes
+    /// newline characters and leading whitespace.
+    pub line_spans: Vec<InnerSpan>,
+}
+
+impl<'a> Iterator for Parser<'a> {
+    type Item = Piece<'a>;
+
+    fn next(&mut self) -> Option<Piece<'a>> {
+        if let Some(&(pos, c)) = self.cur.peek() {
+            match c {
+                '{' => {
+                    let curr_last_brace = self.last_opening_brace;
+                    let byte_pos = self.to_span_index(pos);
+                    let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos));
+                    self.last_opening_brace = Some(byte_pos.to(lbrace_end));
+                    self.cur.next();
+                    if self.consume('{') {
+                        self.last_opening_brace = curr_last_brace;
+
+                        Some(String(self.string(pos + 1)))
+                    } else {
+                        let arg = self.argument(lbrace_end);
+                        if let Some(rbrace_pos) = self.consume_closing_brace(&arg) {
+                            if self.is_source_literal {
+                                let lbrace_byte_pos = self.to_span_index(pos);
+                                let rbrace_byte_pos = self.to_span_index(rbrace_pos);
+
+                                let width = self.to_span_width(rbrace_pos);
+
+                                self.arg_places.push(
+                                    lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)),
+                                );
+                            }
+                        } else {
+                            if let Some(&(_, maybe)) = self.cur.peek() {
+                                if maybe == '?' {
+                                    self.suggest_format();
+                                } else {
+                                    self.suggest_positional_arg_instead_of_captured_arg(arg);
+                                }
+                            }
+                        }
+                        Some(NextArgument(Box::new(arg)))
+                    }
+                }
+                '}' => {
+                    self.cur.next();
+                    if self.consume('}') {
+                        Some(String(self.string(pos + 1)))
+                    } else {
+                        let err_pos = self.to_span_index(pos);
+                        self.err_with_note(
+                            "unmatched `}` found",
+                            "unmatched `}`",
+                            "if you intended to print `}`, you can escape it using `}}`",
+                            err_pos.to(err_pos),
+                        );
+                        None
+                    }
+                }
+                _ => Some(String(self.string(pos))),
+            }
+        } else {
+            if self.is_source_literal {
+                let span = self.span(self.cur_line_start, self.input.len());
+                if self.line_spans.last() != Some(&span) {
+                    self.line_spans.push(span);
+                }
+            }
+            None
+        }
+    }
+}
+
+impl<'a> Parser<'a> {
+    /// Creates a new parser for the given format string
+    pub fn new(
+        s: &'a str,
+        style: Option<usize>,
+        snippet: Option<string::String>,
+        append_newline: bool,
+        mode: ParseMode,
+    ) -> Parser<'a> {
+        let input_string_kind = find_width_map_from_snippet(s, snippet, style);
+        let (width_map, is_source_literal) = match input_string_kind {
+            InputStringKind::Literal { width_mappings } => (width_mappings, true),
+            InputStringKind::NotALiteral => (Vec::new(), false),
+        };
+
+        Parser {
+            mode,
+            input: s,
+            cur: s.char_indices().peekable(),
+            errors: vec![],
+            curarg: 0,
+            style,
+            arg_places: vec![],
+            width_map,
+            last_opening_brace: None,
+            append_newline,
+            is_source_literal,
+            cur_line_start: 0,
+            line_spans: vec![],
+        }
+    }
+
+    /// Notifies of an error. The message doesn't actually need to be of type
+    /// String, but I think it does when this eventually uses conditions so it
+    /// might as well start using it now.
+    fn err<S1: Into<string::String>, S2: Into<string::String>>(
+        &mut self,
+        description: S1,
+        label: S2,
+        span: InnerSpan,
+    ) {
+        self.errors.push(ParseError {
+            description: description.into(),
+            note: None,
+            label: label.into(),
+            span,
+            secondary_label: None,
+            suggestion: Suggestion::None,
+        });
+    }
+
+    /// Notifies of an error. The message doesn't actually need to be of type
+    /// String, but I think it does when this eventually uses conditions so it
+    /// might as well start using it now.
+    fn err_with_note<
+        S1: Into<string::String>,
+        S2: Into<string::String>,
+        S3: Into<string::String>,
+    >(
+        &mut self,
+        description: S1,
+        label: S2,
+        note: S3,
+        span: InnerSpan,
+    ) {
+        self.errors.push(ParseError {
+            description: description.into(),
+            note: Some(note.into()),
+            label: label.into(),
+            span,
+            secondary_label: None,
+            suggestion: Suggestion::None,
+        });
+    }
+
+    /// Optionally consumes the specified character. If the character is not at
+    /// the current position, then the current iterator isn't moved and `false` is
+    /// returned, otherwise the character is consumed and `true` is returned.
+    fn consume(&mut self, c: char) -> bool {
+        self.consume_pos(c).is_some()
+    }
+
+    /// Optionally consumes the specified character. If the character is not at
+    /// the current position, then the current iterator isn't moved and `None` is
+    /// returned, otherwise the character is consumed and the current position is
+    /// returned.
+    fn consume_pos(&mut self, c: char) -> Option<usize> {
+        if let Some(&(pos, maybe)) = self.cur.peek() {
+            if c == maybe {
+                self.cur.next();
+                return Some(pos);
+            }
+        }
+        None
+    }
+
+    fn remap_pos(&self, mut pos: usize) -> InnerOffset {
+        for width in &self.width_map {
+            if pos > width.position {
+                pos += width.before - width.after;
+            } else if pos == width.position && width.after == 0 {
+                pos += width.before;
+            } else {
+                break;
+            }
+        }
+
+        InnerOffset(pos)
+    }
+
+    fn to_span_index(&self, pos: usize) -> InnerOffset {
+        // This handles the raw string case, the raw argument is the number of #
+        // in r###"..."### (we need to add one because of the `r`).
+        let raw = self.style.map_or(0, |raw| raw + 1);
+        let pos = self.remap_pos(pos);
+        InnerOffset(raw + pos.0 + 1)
+    }
+
+    fn to_span_width(&self, pos: usize) -> usize {
+        let pos = self.remap_pos(pos);
+        match self.width_map.iter().find(|w| w.position == pos.0) {
+            Some(w) => w.before,
+            None => 1,
+        }
+    }
+
+    fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan {
+        let start = self.to_span_index(start_pos);
+        let end = self.to_span_index(end_pos);
+        start.to(end)
+    }
+
+    /// Forces consumption of the specified character. If the character is not
+    /// found, an error is emitted.
+    fn consume_closing_brace(&mut self, arg: &Argument<'_>) -> Option<usize> {
+        self.ws();
+
+        let pos;
+        let description;
+
+        if let Some(&(peek_pos, maybe)) = self.cur.peek() {
+            if maybe == '}' {
+                self.cur.next();
+                return Some(peek_pos);
+            }
+
+            pos = peek_pos;
+            description = format!("expected `'}}'`, found `{maybe:?}`");
+        } else {
+            description = "expected `'}'` but string was terminated".to_owned();
+            // point at closing `"`
+            pos = self.input.len() - if self.append_newline { 1 } else { 0 };
+        }
+
+        let pos = self.to_span_index(pos);
+
+        let label = "expected `'}'`".to_owned();
+        let (note, secondary_label) = if arg.format.fill == Some('}') {
+            (
+                Some("the character `'}'` is interpreted as a fill character because of the `:` that precedes it".to_owned()),
+                arg.format.fill_span.map(|sp| ("this is not interpreted as a formatting closing brace".to_owned(), sp)),
+            )
+        } else {
+            (
+                Some("if you intended to print `{`, you can escape it using `{{`".to_owned()),
+                self.last_opening_brace
+                    .map(|sp| ("because of this opening brace".to_owned(), sp)),
+            )
+        };
+
+        self.errors.push(ParseError {
+            description,
+            note,
+            label,
+            span: pos.to(pos),
+            secondary_label,
+            suggestion: Suggestion::None,
+        });
+
+        None
+    }
+
+    /// Consumes all whitespace characters until the first non-whitespace character
+    fn ws(&mut self) {
+        while let Some(&(_, c)) = self.cur.peek() {
+            if c.is_whitespace() {
+                self.cur.next();
+            } else {
+                break;
+            }
+        }
+    }
+
+    /// Parses all of a string which is to be considered a "raw literal" in a
+    /// format string. This is everything outside of the braces.
+    fn string(&mut self, start: usize) -> &'a str {
+        // we may not consume the character, peek the iterator
+        while let Some(&(pos, c)) = self.cur.peek() {
+            match c {
+                '{' | '}' => {
+                    return &self.input[start..pos];
+                }
+                '\n' if self.is_source_literal => {
+                    self.line_spans.push(self.span(self.cur_line_start, pos));
+                    self.cur_line_start = pos + 1;
+                    self.cur.next();
+                }
+                _ => {
+                    if self.is_source_literal && pos == self.cur_line_start && c.is_whitespace() {
+                        self.cur_line_start = pos + c.len_utf8();
+                    }
+                    self.cur.next();
+                }
+            }
+        }
+        &self.input[start..self.input.len()]
+    }
+
+    /// Parses an `Argument` structure, or what's contained within braces inside the format string.
+    fn argument(&mut self, start: InnerOffset) -> Argument<'a> {
+        let pos = self.position();
+
+        let end = self
+            .cur
+            .clone()
+            .find(|(_, ch)| !ch.is_whitespace())
+            .map_or(start, |(end, _)| self.to_span_index(end));
+        let position_span = start.to(end);
+
+        let format = match self.mode {
+            ParseMode::Format => self.format(),
+            ParseMode::InlineAsm => self.inline_asm(),
+        };
+
+        // Resolve position after parsing format spec.
+        let pos = match pos {
+            Some(position) => position,
+            None => {
+                let i = self.curarg;
+                self.curarg += 1;
+                ArgumentImplicitlyIs(i)
+            }
+        };
+
+        Argument {
+            position: pos,
+            position_span,
+            format,
+        }
+    }
+
+    /// Parses a positional argument for a format. This could either be an
+    /// integer index of an argument, a named argument, or a blank string.
+    /// Returns `Some(parsed_position)` if the position is not implicitly
+    /// consuming a macro argument, `None` if it's the case.
+    fn position(&mut self) -> Option<Position<'a>> {
+        if let Some(i) = self.integer() {
+            Some(ArgumentIs(i))
+        } else {
+            match self.cur.peek() {
+                Some(&(lo, c)) if is_id_start(c) => {
+                    let word = self.word();
+
+                    // Recover from `r#ident` in format strings.
+                    // FIXME: use a let chain
+                    if word == "r" {
+                        if let Some((pos, '#')) = self.cur.peek() {
+                            if self.input[pos + 1..]
+                                .chars()
+                                .next()
+                                .is_some_and(is_id_start)
+                            {
+                                self.cur.next();
+                                let word = self.word();
+                                let prefix_span = self.span(lo, lo + 2);
+                                let full_span = self.span(lo, lo + 2 + word.len());
+                                self.errors.insert(0, ParseError {
+                                    description: "raw identifiers are not supported".to_owned(),
+                                    note: Some("identifiers in format strings can be keywords and don't need to be prefixed with `r#`".to_string()),
+                                    label: "raw identifier used here".to_owned(),
+                                    span: full_span,
+                                    secondary_label: None,
+                                    suggestion: Suggestion::RemoveRawIdent(prefix_span),
+                                });
+                                return Some(ArgumentNamed(word));
+                            }
+                        }
+                    }
+
+                    Some(ArgumentNamed(word))
+                }
+
+                // This is an `ArgumentNext`.
+                // Record the fact and do the resolution after parsing the
+                // format spec, to make things like `{:.*}` work.
+                _ => None,
+            }
+        }
+    }
+
+    fn current_pos(&mut self) -> usize {
+        if let Some(&(pos, _)) = self.cur.peek() {
+            pos
+        } else {
+            self.input.len()
+        }
+    }
+
+    /// Parses a format specifier at the current position, returning all of the
+    /// relevant information in the `FormatSpec` struct.
+    fn format(&mut self) -> FormatSpec<'a> {
+        let mut spec = FormatSpec {
+            fill: None,
+            fill_span: None,
+            align: AlignUnknown,
+            sign: None,
+            alternate: false,
+            zero_pad: false,
+            debug_hex: None,
+            precision: CountImplied,
+            precision_span: None,
+            width: CountImplied,
+            width_span: None,
+            ty: &self.input[..0],
+            ty_span: None,
+        };
+        if !self.consume(':') {
+            return spec;
+        }
+
+        // fill character
+        if let Some(&(idx, c)) = self.cur.peek() {
+            if let Some((_, '>' | '<' | '^')) = self.cur.clone().nth(1) {
+                spec.fill = Some(c);
+                spec.fill_span = Some(self.span(idx, idx + 1));
+                self.cur.next();
+            }
+        }
+        // Alignment
+        if self.consume('<') {
+            spec.align = AlignLeft;
+        } else if self.consume('>') {
+            spec.align = AlignRight;
+        } else if self.consume('^') {
+            spec.align = AlignCenter;
+        }
+        // Sign flags
+        if self.consume('+') {
+            spec.sign = Some(Sign::Plus);
+        } else if self.consume('-') {
+            spec.sign = Some(Sign::Minus);
+        }
+        // Alternate marker
+        if self.consume('#') {
+            spec.alternate = true;
+        }
+        // Width and precision
+        let mut havewidth = false;
+
+        if self.consume('0') {
+            // small ambiguity with '0$' as a format string. In theory this is a
+            // '0' flag and then an ill-formatted format string with just a '$'
+            // and no count, but this is better if we instead interpret this as
+            // no '0' flag and '0$' as the width instead.
+            if let Some(end) = self.consume_pos('$') {
+                spec.width = CountIsParam(0);
+                spec.width_span = Some(self.span(end - 1, end + 1));
+                havewidth = true;
+            } else {
+                spec.zero_pad = true;
+            }
+        }
+
+        if !havewidth {
+            let start = self.current_pos();
+            spec.width = self.count(start);
+            if spec.width != CountImplied {
+                let end = self.current_pos();
+                spec.width_span = Some(self.span(start, end));
+            }
+        }
+
+        if let Some(start) = self.consume_pos('.') {
+            if self.consume('*') {
+                // Resolve `CountIsNextParam`.
+                // We can do this immediately as `position` is resolved later.
+                let i = self.curarg;
+                self.curarg += 1;
+                spec.precision = CountIsStar(i);
+            } else {
+                spec.precision = self.count(start + 1);
+            }
+            let end = self.current_pos();
+            spec.precision_span = Some(self.span(start, end));
+        }
+
+        let ty_span_start = self.current_pos();
+        // Optional radix followed by the actual format specifier
+        if self.consume('x') {
+            if self.consume('?') {
+                spec.debug_hex = Some(DebugHex::Lower);
+                spec.ty = "?";
+            } else {
+                spec.ty = "x";
+            }
+        } else if self.consume('X') {
+            if self.consume('?') {
+                spec.debug_hex = Some(DebugHex::Upper);
+                spec.ty = "?";
+            } else {
+                spec.ty = "X";
+            }
+        } else if self.consume('?') {
+            spec.ty = "?";
+        } else {
+            spec.ty = self.word();
+            if !spec.ty.is_empty() {
+                let ty_span_end = self.current_pos();
+                spec.ty_span = Some(self.span(ty_span_start, ty_span_end));
+            }
+        }
+        spec
+    }
+
+    /// Parses an inline assembly template modifier at the current position, returning the modifier
+    /// in the `ty` field of the `FormatSpec` struct.
+    fn inline_asm(&mut self) -> FormatSpec<'a> {
+        let mut spec = FormatSpec {
+            fill: None,
+            fill_span: None,
+            align: AlignUnknown,
+            sign: None,
+            alternate: false,
+            zero_pad: false,
+            debug_hex: None,
+            precision: CountImplied,
+            precision_span: None,
+            width: CountImplied,
+            width_span: None,
+            ty: &self.input[..0],
+            ty_span: None,
+        };
+        if !self.consume(':') {
+            return spec;
+        }
+
+        let ty_span_start = self.current_pos();
+        spec.ty = self.word();
+        if !spec.ty.is_empty() {
+            let ty_span_end = self.current_pos();
+            spec.ty_span = Some(self.span(ty_span_start, ty_span_end));
+        }
+
+        spec
+    }
+
+    /// Parses a `Count` parameter at the current position. This does not check
+    /// for 'CountIsNextParam' because that is only used in precision, not
+    /// width.
+    fn count(&mut self, start: usize) -> Count<'a> {
+        if let Some(i) = self.integer() {
+            if self.consume('$') {
+                CountIsParam(i)
+            } else {
+                CountIs(i)
+            }
+        } else {
+            let tmp = self.cur.clone();
+            let word = self.word();
+            if word.is_empty() {
+                self.cur = tmp;
+                CountImplied
+            } else if let Some(end) = self.consume_pos('$') {
+                let name_span = self.span(start, end);
+                CountIsName(word, name_span)
+            } else {
+                self.cur = tmp;
+                CountImplied
+            }
+        }
+    }
+
+    /// Parses a word starting at the current position. A word is the same as
+    /// Rust identifier, except that it can't start with `_` character.
+    fn word(&mut self) -> &'a str {
+        let start = match self.cur.peek() {
+            Some(&(pos, c)) if is_id_start(c) => {
+                self.cur.next();
+                pos
+            }
+            _ => {
+                return "";
+            }
+        };
+        let mut end = None;
+        while let Some(&(pos, c)) = self.cur.peek() {
+            if is_id_continue(c) {
+                self.cur.next();
+            } else {
+                end = Some(pos);
+                break;
+            }
+        }
+        let end = end.unwrap_or(self.input.len());
+        let word = &self.input[start..end];
+        if word == "_" {
+            self.err_with_note(
+                "invalid argument name `_`",
+                "invalid argument name",
+                "argument name cannot be a single underscore",
+                self.span(start, end),
+            );
+        }
+        word
+    }
+
+    fn integer(&mut self) -> Option<usize> {
+        let mut cur: usize = 0;
+        let mut found = false;
+        let mut overflow = false;
+        let start = self.current_pos();
+        while let Some(&(_, c)) = self.cur.peek() {
+            if let Some(i) = c.to_digit(10) {
+                let (tmp, mul_overflow) = cur.overflowing_mul(10);
+                let (tmp, add_overflow) = tmp.overflowing_add(i as usize);
+                if mul_overflow || add_overflow {
+                    overflow = true;
+                }
+                cur = tmp;
+                found = true;
+                self.cur.next();
+            } else {
+                break;
+            }
+        }
+
+        if overflow {
+            let end = self.current_pos();
+            let overflowed_int = &self.input[start..end];
+            self.err(
+                format!(
+                    "integer `{}` does not fit into the type `usize` whose range is `0..={}`",
+                    overflowed_int,
+                    usize::MAX
+                ),
+                "integer out of range for `usize`",
+                self.span(start, end),
+            );
+        }
+
+        found.then_some(cur)
+    }
+
+    fn suggest_format(&mut self) {
+        if let (Some(pos), Some(_)) = (self.consume_pos('?'), self.consume_pos(':')) {
+            let word = self.word();
+            let _end = self.current_pos();
+            let pos = self.to_span_index(pos);
+            self.errors.insert(
+                0,
+                ParseError {
+                    description: "expected format parameter to occur after `:`".to_owned(),
+                    note: Some(format!(
+                        "`?` comes after `:`, try `{}:{}` instead",
+                        word, "?"
+                    )),
+                    label: "expected `?` to occur after `:`".to_owned(),
+                    span: pos.to(pos),
+                    secondary_label: None,
+                    suggestion: Suggestion::None,
+                },
+            );
+        }
+    }
+
+    fn suggest_positional_arg_instead_of_captured_arg(&mut self, arg: Argument<'a>) {
+        if let Some(end) = self.consume_pos('.') {
+            let byte_pos = self.to_span_index(end);
+            let start = InnerOffset(byte_pos.0 + 1);
+            let field = self.argument(start);
+            // We can only parse `foo.bar` field access, any deeper nesting,
+            // or another type of expression, like method calls, are not supported
+            if !self.consume('}') {
+                return;
+            }
+            if let ArgumentNamed(_) = arg.position {
+                if let ArgumentNamed(_) = field.position {
+                    self.errors.insert(
+                        0,
+                        ParseError {
+                            description: "field access isn't supported".to_string(),
+                            note: None,
+                            label: "not supported".to_string(),
+                            span: InnerSpan::new(arg.position_span.start, field.position_span.end),
+                            secondary_label: None,
+                            suggestion: Suggestion::UsePositional,
+                        },
+                    );
+                }
+            }
+        }
+    }
+}
+
+/// Finds the indices of all characters that have been processed and differ between the actual
+/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
+/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
+// TODO: Can we give an escaped string here? probably yes - and a valid one too
+fn find_width_map_from_snippet(
+    input: &str,
+    snippet: Option<string::String>,
+    str_style: Option<usize>,
+) -> InputStringKind {
+    let snippet = match snippet {
+        Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
+        _ => return InputStringKind::NotALiteral,
+    };
+
+    if str_style.is_some() {
+        return InputStringKind::Literal {
+            width_mappings: Vec::new(),
+        };
+    }
+
+    // Strip quotes.
+    let snippet = &snippet[1..snippet.len() - 1];
+
+    // Macros like `println` add a newline at the end. That technically doesn't make them "literals" anymore, but it's fine
+    // since we will never need to point our spans there, so we lie about it here by ignoring it.
+    // Since there might actually be newlines in the source code, we need to normalize away all trailing newlines.
+    // If we only trimmed it off the input, `format!("\n")` would cause a mismatch as here we they actually match up.
+    // Alternatively, we could just count the trailing newlines and only trim one from the input if they don't match up.
+    let input_no_nl = input.trim_end_matches('\n');
+    let Some(unescaped) = unescape_string(snippet) else {
+        return InputStringKind::NotALiteral;
+    };
+
+    let unescaped_no_nl = unescaped.trim_end_matches('\n');
+
+    if unescaped_no_nl != input_no_nl {
+        // The source string that we're pointing at isn't our input, so spans pointing at it will be incorrect.
+        // This can for example happen with proc macros that respan generated literals.
+        return InputStringKind::NotALiteral;
+    }
+
+    let mut s = snippet.char_indices();
+    let mut width_mappings = vec![];
+    while let Some((pos, c)) = s.next() {
+        match (c, s.clone().next()) {
+            // skip whitespace and empty lines ending in '\\'
+            ('\\', Some((_, '\n'))) => {
+                let _ = s.next();
+                let mut width = 2;
+
+                while let Some((_, c)) = s.clone().next() {
+                    if matches!(c, ' ' | '\n' | '\t') {
+                        width += 1;
+                        let _ = s.next();
+                    } else {
+                        break;
+                    }
+                }
+
+                width_mappings.push(InnerWidthMapping::new(pos, width, 0));
+            }
+            ('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
+                width_mappings.push(InnerWidthMapping::new(pos, 2, 1));
+                let _ = s.next();
+            }
+            ('\\', Some((_, 'x'))) => {
+                // consume `\xAB` literal
+                s.nth(2);
+                width_mappings.push(InnerWidthMapping::new(pos, 4, 1));
+            }
+            ('\\', Some((_, 'u'))) => {
+                let mut width = 2;
+                let _ = s.next();
+
+                if let Some((_, next_c)) = s.next() {
+                    if next_c == '{' {
+                        // consume up to 6 hexanumeric chars
+                        let digits_len = s
+                            .clone()
+                            .take(6)
+                            .take_while(|(_, c)| c.is_digit(16))
+                            .count();
+
+                        let len_utf8 = s
+                            .as_str()
+                            .get(..digits_len)
+                            .and_then(|digits| u32::from_str_radix(digits, 16).ok())
+                            .and_then(char::from_u32)
+                            .map_or(1, char::len_utf8);
+
+                        // Skip the digits, for chars that encode to more than 1 utf-8 byte
+                        // exclude as many digits as it is greater than 1 byte
+                        //
+                        // So for a 3 byte character, exclude 2 digits
+                        let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1));
+
+                        // skip '{' and '}' also
+                        width += required_skips + 2;
+
+                        s.nth(digits_len);
+                    } else if next_c.is_digit(16) {
+                        width += 1;
+
+                        // We suggest adding `{` and `}` when appropriate, accept it here as if
+                        // it were correct
+                        let mut i = 0; // consume up to 6 hexanumeric chars
+                        while let (Some((_, c)), _) = (s.next(), i < 6) {
+                            if c.is_digit(16) {
+                                width += 1;
+                            } else {
+                                break;
+                            }
+                            i += 1;
+                        }
+                    }
+                }
+
+                width_mappings.push(InnerWidthMapping::new(pos, width, 1));
+            }
+            _ => {}
+        }
+    }
+
+    InputStringKind::Literal { width_mappings }
+}
+
+// TODO: I guess we can provide an `unescape_string` function to the parser... but how do we do that
+// Store it in the parser struct? we need to make it FFI-aware
+// SO this is not possible because we need `unescape_string` *before* we have a parser
+
+fn unescape_string(string: &str) -> Option<string::String> {
+    // let mut buf = string::String::new();
+    // let mut ok = true;
+    // unescape::unescape_literal(string, unescape::Mode::Str, &mut |_, unescaped_char| {
+    //     match unescaped_char {
+    //         Ok(c) => buf.push(c),
+    //         Err(_) => ok = false,
+    //     }
+    // });
+
+    let buf = string::String::from(string);
+    let ok = true;
+
+    ok.then_some(buf)
+}
+
+// Assert a reasonable size for `Piece`
+// #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
+// rustc_index::static_assert_size!(Piece<'_>, 16);
+
+// #[cfg(test)]
+// mod tests;
\ No newline at end of file
diff --git a/libgrust/libformat_parser/src/bin.rs b/libgrust/libformat_parser/src/bin.rs
new file mode 100644
index 000000000000..4b1f903ad5fa
--- /dev/null
+++ b/libgrust/libformat_parser/src/bin.rs
@@ -0,0 +1,7 @@
+use libformat_parser::rust;
+
+fn main() {
+    dbg!(rust::collect_pieces(
+        std::env::args().nth(1).unwrap().as_str()
+    ));
+}
diff --git a/libgrust/libformat_parser/src/lib.rs b/libgrust/libformat_parser/src/lib.rs
new file mode 100644
index 000000000000..e6dc16eeb498
--- /dev/null
+++ b/libgrust/libformat_parser/src/lib.rs
@@ -0,0 +1,41 @@
+//! FFI interface for `rustc_format_parser`
+
+// what's the plan? Have a function return something that can be constructed into a vector?
+// or an iterator?
+
+use std::ffi::CStr;
+
+// TODO: Use rustc's version here #3
+use generic_format_parser::Piece;
+
+// FIXME: Rename?
+pub mod rust {
+    use generic_format_parser::{ParseMode, Parser, Piece};
+
+    pub fn collect_pieces(input: &str) -> Vec<Piece<'_>> {
+        // let parser = Parser::new();
+        let parser = Parser::new(input, None, None, true, ParseMode::Format);
+
+        parser.into_iter().collect()
+    }
+}
+
+#[repr(C)]
+pub struct PieceSlice {
+    base_ptr: *const Piece<'static /* FIXME: That's wrong */>,
+    len: usize,
+}
+
+#[no_mangle]
+pub extern "C" fn collect_pieces(input: *const libc::c_char) -> PieceSlice {
+    // FIXME: Add comment
+    let str = unsafe { CStr::from_ptr(input) };
+
+    // FIXME: No unwrap
+    let pieces = rust::collect_pieces(str.to_str().unwrap());
+
+    PieceSlice {
+        base_ptr: pieces.as_ptr(),
+        len: pieces.len(),
+    }
+}

From af42047389d6958d84082d981febfba930d868d3 Mon Sep 17 00:00:00 2001
From: Arthur Cohen <arthur.cohen@embecosm.com>
Date: Mon, 29 Jan 2024 16:13:24 +0100
Subject: [PATCH 03/13] libformat_parser: Add FFI safe interface

libgrust/ChangeLog:

	* libformat_parser/generic_format_parser/src/lib.rs: Add generic
	library.
	* libformat_parser/src/lib.rs: Add base for FFI interface.
---
 .../generic_format_parser/src/lib.rs          |   2 +-
 libgrust/libformat_parser/src/lib.rs          | 301 +++++++++++++++++-
 2 files changed, 298 insertions(+), 5 deletions(-)

diff --git a/libgrust/libformat_parser/generic_format_parser/src/lib.rs b/libgrust/libformat_parser/generic_format_parser/src/lib.rs
index f42c9d8dffbb..87a20dc18c56 100644
--- a/libgrust/libformat_parser/generic_format_parser/src/lib.rs
+++ b/libgrust/libformat_parser/generic_format_parser/src/lib.rs
@@ -1099,4 +1099,4 @@ fn unescape_string(string: &str) -> Option<string::String> {
 // rustc_index::static_assert_size!(Piece<'_>, 16);
 
 // #[cfg(test)]
-// mod tests;
\ No newline at end of file
+// mod tests;
diff --git a/libgrust/libformat_parser/src/lib.rs b/libgrust/libformat_parser/src/lib.rs
index e6dc16eeb498..49821e7cd2f4 100644
--- a/libgrust/libformat_parser/src/lib.rs
+++ b/libgrust/libformat_parser/src/lib.rs
@@ -5,8 +5,298 @@
 
 use std::ffi::CStr;
 
-// TODO: Use rustc's version here #3
-use generic_format_parser::Piece;
+mod ffi {
+    use std::ops::Deref;
+
+    // Note: copied from rustc_span
+    /// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
+    #[derive(Copy, Clone, PartialEq, Eq, Debug)]
+    #[repr(C)]
+    pub struct InnerSpan {
+        pub start: usize,
+        pub end: usize,
+    }
+
+    // impl InnerSpan {
+    //     pub fn new(start: usize, end: usize) -> InnerSpan {
+    //         InnerSpan { start, end }
+    //     }
+    // }
+
+    /// The location and before/after width of a character whose width has changed from its source code
+    /// representation
+    #[derive(Copy, Clone, PartialEq, Eq)]
+    #[repr(C)]
+    pub struct InnerWidthMapping {
+        /// Index of the character in the source
+        pub position: usize,
+        /// The inner width in characters
+        pub before: usize,
+        /// The transformed width in characters
+        pub after: usize,
+    }
+
+    // impl InnerWidthMapping {
+    //     pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
+    //         InnerWidthMapping {
+    //             position,
+    //             before,
+    //             after,
+    //         }
+    //     }
+    // }
+
+    /// Whether the input string is a literal. If yes, it contains the inner width mappings.
+    #[derive(Clone, PartialEq, Eq)]
+    #[repr(C)]
+    enum InputStringKind {
+        NotALiteral,
+        Literal {
+            width_mappings: Vec<InnerWidthMapping>,
+        },
+    }
+
+    /// The type of format string that we are parsing.
+    #[derive(Copy, Clone, Debug, Eq, PartialEq)]
+    #[repr(C)]
+    pub enum ParseMode {
+        /// A normal format string as per `format_args!`.
+        Format,
+        /// An inline assembly template string for `asm!`.
+        InlineAsm,
+    }
+
+    #[derive(Copy, Clone)]
+    #[repr(C)]
+    struct InnerOffset(usize);
+
+    /// A piece is a portion of the format string which represents the next part
+    /// to emit. These are emitted as a stream by the `Parser` class.
+    #[derive(Clone, Debug, PartialEq)]
+    #[repr(C)]
+    pub enum Piece<'a> {
+        /// A literal string which should directly be emitted
+        String(&'a str),
+        /// This describes that formatting should process the next argument (as
+        /// specified inside) for emission.
+        NextArgument(Box<Argument<'a>>),
+    }
+
+    /// Representation of an argument specification.
+    #[derive(Copy, Clone, Debug, PartialEq)]
+    #[repr(C)]
+    pub struct Argument<'a> {
+        /// Where to find this argument
+        pub position: Position<'a>,
+        /// The span of the position indicator. Includes any whitespace in implicit
+        /// positions (`{  }`).
+        pub position_span: InnerSpan,
+        /// How to format the argument
+        pub format: FormatSpec<'a>,
+    }
+
+    /// Specification for the formatting of an argument in the format string.
+    #[derive(Copy, Clone, Debug, PartialEq)]
+    #[repr(C)]
+    pub struct FormatSpec<'a> {
+        /// Optionally specified character to fill alignment with.
+        pub fill: Option<char>,
+        /// Span of the optionally specified fill character.
+        pub fill_span: Option<InnerSpan>,
+        /// Optionally specified alignment.
+        pub align: Alignment,
+        /// The `+` or `-` flag.
+        pub sign: Option<Sign>,
+        /// The `#` flag.
+        pub alternate: bool,
+        /// The `0` flag.
+        pub zero_pad: bool,
+        /// The `x` or `X` flag. (Only for `Debug`.)
+        pub debug_hex: Option<DebugHex>,
+        /// The integer precision to use.
+        pub precision: Count<'a>,
+        /// The span of the precision formatting flag (for diagnostics).
+        pub precision_span: Option<InnerSpan>,
+        /// The string width requested for the resulting format.
+        pub width: Count<'a>,
+        /// The span of the width formatting flag (for diagnostics).
+        pub width_span: Option<InnerSpan>,
+        /// The descriptor string representing the name of the format desired for
+        /// this argument, this can be empty or any number of characters, although
+        /// it is required to be one word.
+        pub ty: &'a str,
+        /// The span of the descriptor string (for diagnostics).
+        pub ty_span: Option<InnerSpan>,
+    }
+
+    /// Enum describing where an argument for a format can be located.
+    #[derive(Copy, Clone, Debug, PartialEq)]
+    #[repr(C)]
+    pub enum Position<'a> {
+        /// The argument is implied to be located at an index
+        ArgumentImplicitlyIs(usize),
+        /// The argument is located at a specific index given in the format,
+        ArgumentIs(usize),
+        /// The argument has a name.
+        ArgumentNamed(&'a str),
+    }
+
+    /// Enum of alignments which are supported.
+    #[derive(Copy, Clone, Debug, PartialEq)]
+    #[repr(C)]
+    pub enum Alignment {
+        /// The value will be aligned to the left.
+        AlignLeft,
+        /// The value will be aligned to the right.
+        AlignRight,
+        /// The value will be aligned in the center.
+        AlignCenter,
+        /// The value will take on a default alignment.
+        AlignUnknown,
+    }
+
+    /// Enum for the sign flags.
+    #[derive(Copy, Clone, Debug, PartialEq)]
+    #[repr(C)]
+    pub enum Sign {
+        /// The `+` flag.
+        Plus,
+        /// The `-` flag.
+        Minus,
+    }
+
+    /// Enum for the debug hex flags.
+    #[derive(Copy, Clone, Debug, PartialEq)]
+    #[repr(C)]
+    pub enum DebugHex {
+        /// The `x` flag in `{:x?}`.
+        Lower,
+        /// The `X` flag in `{:X?}`.
+        Upper,
+    }
+
+    /// A count is used for the precision and width parameters of an integer, and
+    /// can reference either an argument or a literal integer.
+    #[derive(Copy, Clone, Debug, PartialEq)]
+    #[repr(C)]
+    pub enum Count<'a> {
+        /// The count is specified explicitly.
+        CountIs(usize),
+        /// The count is specified by the argument with the given name.
+        CountIsName(&'a str, InnerSpan),
+        /// The count is specified by the argument at the given index.
+        CountIsParam(usize),
+        /// The count is specified by a star (like in `{:.*}`) that refers to the argument at the given index.
+        CountIsStar(usize),
+        /// The count is implied and cannot be explicitly specified.
+        CountImplied,
+    }
+
+    impl<'a> From<generic_format_parser::Piece<'a>> for Piece<'a> {
+        fn from(old: generic_format_parser::Piece<'a>) -> Self {
+            match old {
+                generic_format_parser::Piece::String(x) => Piece::String(x),
+                generic_format_parser::Piece::NextArgument(x) => {
+                    Piece::NextArgument(Box::new(Into::<Argument>::into(*x)))
+                }
+            }
+        }
+    }
+
+    impl<'a> From<generic_format_parser::Argument<'a>> for Argument<'a> {
+        fn from(old: generic_format_parser::Argument<'a>) -> Self {
+            Argument {
+                position: old.position.into(),
+                position_span: old.position_span.into(),
+                format: old.format.into(),
+            }
+        }
+    }
+
+    impl<'a> From<generic_format_parser::Position<'a>> for Position<'a> {
+        fn from(old: generic_format_parser::Position<'a>) -> Self {
+            match old {
+                generic_format_parser::Position::ArgumentImplicitlyIs(x) => {
+                    Position::ArgumentImplicitlyIs(x.into())
+                }
+                generic_format_parser::Position::ArgumentIs(x) => Position::ArgumentIs(x.into()),
+                generic_format_parser::Position::ArgumentNamed(x) => {
+                    Position::ArgumentNamed(x.into())
+                }
+            }
+        }
+    }
+
+    impl From<generic_format_parser::InnerSpan> for InnerSpan {
+        fn from(old: generic_format_parser::InnerSpan) -> Self {
+            InnerSpan {
+                start: old.start,
+                end: old.end,
+            }
+        }
+    }
+
+    impl<'a> From<generic_format_parser::FormatSpec<'a>> for FormatSpec<'a> {
+        fn from(old: generic_format_parser::FormatSpec<'a>) -> Self {
+            FormatSpec {
+                fill: old.fill,
+                fill_span: old.fill_span.map(Into::into),
+                align: old.align.into(),
+                sign: old.sign.map(Into::into),
+                alternate: old.alternate,
+                zero_pad: old.zero_pad,
+                debug_hex: old.debug_hex.map(Into::into),
+                precision: old.precision.into(),
+                precision_span: old.precision_span.map(Into::into),
+                width: old.width.into(),
+                width_span: old.width_span.map(Into::into),
+                ty: old.ty,
+                ty_span: old.ty_span.map(Into::into),
+            }
+        }
+    }
+
+    impl From<generic_format_parser::DebugHex> for DebugHex {
+        fn from(old: generic_format_parser::DebugHex) -> Self {
+            match old {
+                generic_format_parser::DebugHex::Lower => DebugHex::Lower,
+                generic_format_parser::DebugHex::Upper => DebugHex::Upper,
+            }
+        }
+    }
+
+    impl<'a> From<generic_format_parser::Count<'a>> for Count<'a> {
+        fn from(old: generic_format_parser::Count<'a>) -> Self {
+            match old {
+                generic_format_parser::Count::CountIs(x) => Count::CountIs(x),
+                generic_format_parser::Count::CountIsName(x, y) => Count::CountIsName(x, y.into()),
+                generic_format_parser::Count::CountIsParam(x) => Count::CountIsParam(x),
+                generic_format_parser::Count::CountIsStar(x) => Count::CountIsStar(x),
+                generic_format_parser::Count::CountImplied => Count::CountImplied,
+            }
+        }
+    }
+
+    impl From<generic_format_parser::Sign> for Sign {
+        fn from(old: generic_format_parser::Sign) -> Self {
+            match old {
+                generic_format_parser::Sign::Plus => Sign::Plus,
+                generic_format_parser::Sign::Minus => Sign::Minus,
+            }
+        }
+    }
+
+    impl From<generic_format_parser::Alignment> for Alignment {
+        fn from(old: generic_format_parser::Alignment) -> Self {
+            match old {
+                generic_format_parser::Alignment::AlignLeft => Alignment::AlignLeft,
+                generic_format_parser::Alignment::AlignRight => Alignment::AlignRight,
+                generic_format_parser::Alignment::AlignCenter => Alignment::AlignCenter,
+                generic_format_parser::Alignment::AlignUnknown => Alignment::AlignUnknown,
+            }
+        }
+    }
+}
 
 // FIXME: Rename?
 pub mod rust {
@@ -22,7 +312,7 @@ pub mod rust {
 
 #[repr(C)]
 pub struct PieceSlice {
-    base_ptr: *const Piece<'static /* FIXME: That's wrong */>,
+    base_ptr: *const ffi::Piece<'static /* FIXME: That's wrong */>,
     len: usize,
 }
 
@@ -32,7 +322,10 @@ pub extern "C" fn collect_pieces(input: *const libc::c_char) -> PieceSlice {
     let str = unsafe { CStr::from_ptr(input) };
 
     // FIXME: No unwrap
-    let pieces = rust::collect_pieces(str.to_str().unwrap());
+    let pieces: Vec<ffi::Piece<'_>> = rust::collect_pieces(str.to_str().unwrap())
+        .into_iter()
+        .map(Into::into)
+        .collect();
 
     PieceSlice {
         base_ptr: pieces.as_ptr(),

From 6f7a373c0727c61d06e591c33e969c7f5286b7db Mon Sep 17 00:00:00 2001
From: Arthur Cohen <arthur.cohen@embecosm.com>
Date: Mon, 29 Jan 2024 16:14:13 +0100
Subject: [PATCH 04/13] libformat_parser: Start experimenting with cbindgen

libgrust/ChangeLog:

	* libformat_parser/cbindgen.toml: New file.
	* libformat_parser/libformat-parser.h: New file.

gcc/rust/ChangeLog:

	* ast/rust-fmt.h: Add remaining FFI types.
---
 gcc/rust/ast/rust-fmt.h                      |   4 +-
 libgrust/libformat_parser/cbindgen.toml      |   0
 libgrust/libformat_parser/libformat-parser.h | 224 +++++++++++++++++++
 3 files changed, 226 insertions(+), 2 deletions(-)
 create mode 100644 libgrust/libformat_parser/cbindgen.toml
 create mode 100644 libgrust/libformat_parser/libformat-parser.h

diff --git a/gcc/rust/ast/rust-fmt.h b/gcc/rust/ast/rust-fmt.h
index 0050977358f1..27c1c3625d3e 100644
--- a/gcc/rust/ast/rust-fmt.h
+++ b/gcc/rust/ast/rust-fmt.h
@@ -92,11 +92,11 @@ struct FormatSpec
   /// The `x` or `X` flag. (Only for `Debug`.)
   tl::optional<DebugHex> debug_hex;
   /// The integer precision to use.
-  // Count <'a> precision;
+  Count precision;
   /// The span of the precision formatting flag (for diagnostics).
   tl::optional<InnerSpan> precision_span;
   /// The string width requested for the resulting format.
-  // Count <'a> width;
+  Count width;
   /// The span of the width formatting flag (for diagnostics).
   tl::optional<InnerSpan> width_span;
   /// The descriptor string representing the name of the format desired for
diff --git a/libgrust/libformat_parser/cbindgen.toml b/libgrust/libformat_parser/cbindgen.toml
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/libgrust/libformat_parser/libformat-parser.h b/libgrust/libformat_parser/libformat-parser.h
new file mode 100644
index 000000000000..a4bc8a754944
--- /dev/null
+++ b/libgrust/libformat_parser/libformat-parser.h
@@ -0,0 +1,224 @@
+#include <cstdarg>
+#include <cstdint>
+#include <cstdlib>
+#include <ostream>
+#include <new>
+
+/// Enum of alignments which are supported.
+enum class Alignment
+{
+  /// The value will be aligned to the left.
+  AlignLeft,
+  /// The value will be aligned to the right.
+  AlignRight,
+  /// The value will be aligned in the center.
+  AlignCenter,
+  /// The value will take on a default alignment.
+  AlignUnknown,
+};
+
+/// Enum for the debug hex flags.
+enum class DebugHex
+{
+  /// The `x` flag in `{:x?}`.
+  Lower,
+  /// The `X` flag in `{:X?}`.
+  Upper,
+};
+
+/// Enum for the sign flags.
+enum class Sign
+{
+  /// The `+` flag.
+  Plus,
+  /// The `-` flag.
+  Minus,
+};
+
+template <typename T = void> struct Box;
+
+template <typename T = void> struct Option;
+
+/// Enum describing where an argument for a format can be located.
+struct Position
+{
+  enum class Tag
+  {
+    /// The argument is implied to be located at an index
+    ArgumentImplicitlyIs,
+    /// The argument is located at a specific index given in the format,
+    ArgumentIs,
+    /// The argument has a name.
+    ArgumentNamed,
+  };
+
+  struct ArgumentImplicitlyIs_Body
+  {
+    uintptr_t _0;
+  };
+
+  struct ArgumentIs_Body
+  {
+    uintptr_t _0;
+  };
+
+  struct ArgumentNamed_Body
+  {
+    const str *_0;
+  };
+
+  Tag tag;
+  union
+  {
+    ArgumentImplicitlyIs_Body argument_implicitly_is;
+    ArgumentIs_Body argument_is;
+    ArgumentNamed_Body argument_named;
+  };
+};
+
+/// Range inside of a `Span` used for diagnostics when we only have access to
+/// relative positions.
+struct InnerSpan
+{
+  uintptr_t start;
+  uintptr_t end;
+};
+
+/// A count is used for the precision and width parameters of an integer, and
+/// can reference either an argument or a literal integer.
+struct Count
+{
+  enum class Tag
+  {
+    /// The count is specified explicitly.
+    CountIs,
+    /// The count is specified by the argument with the given name.
+    CountIsName,
+    /// The count is specified by the argument at the given index.
+    CountIsParam,
+    /// The count is specified by a star (like in `{:.*}`) that refers to the
+    /// argument at the given index.
+    CountIsStar,
+    /// The count is implied and cannot be explicitly specified.
+    CountImplied,
+  };
+
+  struct CountIs_Body
+  {
+    uintptr_t _0;
+  };
+
+  struct CountIsName_Body
+  {
+    const str *_0;
+    InnerSpan _1;
+  };
+
+  struct CountIsParam_Body
+  {
+    uintptr_t _0;
+  };
+
+  struct CountIsStar_Body
+  {
+    uintptr_t _0;
+  };
+
+  Tag tag;
+  union
+  {
+    CountIs_Body count_is;
+    CountIsName_Body count_is_name;
+    CountIsParam_Body count_is_param;
+    CountIsStar_Body count_is_star;
+  };
+};
+
+/// Specification for the formatting of an argument in the format string.
+struct FormatSpec
+{
+  /// Optionally specified character to fill alignment with.
+  Option<uint32_t> fill;
+  /// Span of the optionally specified fill character.
+  Option<InnerSpan> fill_span;
+  /// Optionally specified alignment.
+  Alignment align;
+  /// The `+` or `-` flag.
+  Option<Sign> sign;
+  /// The `#` flag.
+  bool alternate;
+  /// The `0` flag.
+  bool zero_pad;
+  /// The `x` or `X` flag. (Only for `Debug`.)
+  Option<DebugHex> debug_hex;
+  /// The integer precision to use.
+  Count precision;
+  /// The span of the precision formatting flag (for diagnostics).
+  Option<InnerSpan> precision_span;
+  /// The string width requested for the resulting format.
+  Count width;
+  /// The span of the width formatting flag (for diagnostics).
+  Option<InnerSpan> width_span;
+  /// The descriptor string representing the name of the format desired for
+  /// this argument, this can be empty or any number of characters, although
+  /// it is required to be one word.
+  const str *ty;
+  /// The span of the descriptor string (for diagnostics).
+  Option<InnerSpan> ty_span;
+};
+
+/// Representation of an argument specification.
+struct Argument
+{
+  /// Where to find this argument
+  Position position;
+  /// The span of the position indicator. Includes any whitespace in implicit
+  /// positions (`{  }`).
+  InnerSpan position_span;
+  /// How to format the argument
+  FormatSpec format;
+};
+
+/// A piece is a portion of the format string which represents the next part
+/// to emit. These are emitted as a stream by the `Parser` class.
+struct Piece
+{
+  enum class Tag
+  {
+    /// A literal string which should directly be emitted
+    String,
+    /// This describes that formatting should process the next argument (as
+    /// specified inside) for emission.
+    NextArgument,
+  };
+
+  struct String_Body
+  {
+    const str *_0;
+  };
+
+  struct NextArgument_Body
+  {
+    Box<Argument> _0;
+  };
+
+  Tag tag;
+  union
+  {
+    String_Body string;
+    NextArgument_Body next_argument;
+  };
+};
+
+struct PieceSlice
+{
+  const Piece *base_ptr;
+  uintptr_t len;
+};
+
+extern "C" {
+
+PieceSlice
+collect_pieces (const char *input);
+
+} // extern "C"

From 66f80323943a3146ed4b994e339d4fb3fd5f8b40 Mon Sep 17 00:00:00 2001
From: Arthur Cohen <arthur.cohen@embecosm.com>
Date: Mon, 29 Jan 2024 16:17:00 +0100
Subject: [PATCH 05/13] git: Ignore libgrust build folders

ChangeLog:

	* .gitignore: Add libgrust target folders to the ignore list.
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 88b8aa27a882..b1c6625d645c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -76,3 +76,4 @@ test.code-workspace
 
 gcc/rust/test3-tiny/*
 .clang-format.swap
+libgrust/*/target/

From 9fb89f05bdac84977be9779567d5ce76868c4a8b Mon Sep 17 00:00:00 2001
From: Arthur Cohen <arthur.cohen@embecosm.com>
Date: Mon, 29 Jan 2024 22:06:39 +0100
Subject: [PATCH 06/13] libformat_parser: Update header and remove old
 interface

gcc/rust/ChangeLog:

	* ast/rust-fmt.cc (Pieces::collect): Use new Pieces API.
	* ast/rust-fmt.h: Update interface with new FFI bindings.

libgrust/ChangeLog:

	* libformat_parser/src/lib.rs: Add IntoFFI trait.
	* libformat_parser/libformat-parser.h: Removed.
---
 gcc/rust/ast/rust-fmt.cc                     |  10 +-
 gcc/rust/ast/rust-fmt.h                      | 199 ++++++++++++----
 libgrust/libformat_parser/libformat-parser.h | 224 -------------------
 libgrust/libformat_parser/src/lib.rs         |  56 +++--
 4 files changed, 200 insertions(+), 289 deletions(-)
 delete mode 100644 libgrust/libformat_parser/libformat-parser.h

diff --git a/gcc/rust/ast/rust-fmt.cc b/gcc/rust/ast/rust-fmt.cc
index 559b1c8b5795..a7c4341c52db 100644
--- a/gcc/rust/ast/rust-fmt.cc
+++ b/gcc/rust/ast/rust-fmt.cc
@@ -17,6 +17,7 @@
 // <http://www.gnu.org/licenses/>.
 
 #include "rust-fmt.h"
+#include "rust-diagnostics.h"
 
 namespace Rust {
 namespace Fmt {
@@ -26,13 +27,12 @@ Pieces::collect (const std::string &to_parse)
 {
   auto piece_slice = collect_pieces (to_parse.c_str ());
 
-  rust_debug ("[ARTHUR] %p, %lu", (void *) piece_slice.ptr, piece_slice.len);
+  rust_debug ("[ARTHUR] %p, %lu", (const void *) piece_slice.base_ptr,
+	      piece_slice.len);
 
   // this performs multiple copies, can we avoid them maybe?
-  auto pieces
-    = std::vector (piece_slice.ptr, piece_slice.ptr + piece_slice.len);
-
-  rust_debug ("[ARTHUR] %p, %lu", (void *) pieces.data (), pieces.size ());
+  // auto pieces = std::vector<Piece> (piece_slice.base_ptr,
+  // 	     piece_slice.base_ptr + piece_slice.len);
 
   return Pieces{};
 }
diff --git a/gcc/rust/ast/rust-fmt.h b/gcc/rust/ast/rust-fmt.h
index 27c1c3625d3e..7ec9a2a199dd 100644
--- a/gcc/rust/ast/rust-fmt.h
+++ b/gcc/rust/ast/rust-fmt.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2020-2023 Free Software Foundation, Inc.
+// Copyright (C) 2023-2024 Free Software Foundation, Inc.
 
 // This file is part of GCC.
 
@@ -19,9 +19,10 @@
 #ifndef RUST_FMT_H
 #define RUST_FMT_H
 
-#include "rust-diagnostics.h"
 #include "rust-system.h"
 
+// FIXME: How to encode Option?
+
 namespace Rust {
 namespace Fmt {
 
@@ -30,116 +31,220 @@ struct RustHamster
   // hehe
 };
 
-struct InnerSpan
+/// Enum of alignments which are supported.
+enum class Alignment
 {
+  /// The value will be aligned to the left.
+  AlignLeft,
+  /// The value will be aligned to the right.
+  AlignRight,
+  /// The value will be aligned in the center.
+  AlignCenter,
+  /// The value will take on a default alignment.
+  AlignUnknown,
 };
 
-struct Count
+/// Enum for the debug hex flags.
+enum class DebugHex
 {
-  enum class Kind
-  {
-    Is,
-    IsName,
-    IsParam,
-    IsStar,
-    Implied
-  } kind;
-
-  union
-  {
-    size_t is;
-    std::pair<RustHamster, InnerSpan> is_name;
-    size_t is_param;
-    size_t is_star;
-  } data;
+  /// The `x` flag in `{:x?}`.
+  Lower,
+  /// The `X` flag in `{:X?}`.
+  Upper,
 };
 
-struct DebugHex
+/// Enum for the sign flags.
+enum class Sign
 {
+  /// The `+` flag.
+  Plus,
+  /// The `-` flag.
+  Minus,
 };
 
-struct Sign
+/// Enum describing where an argument for a format can be located.
+struct Position
 {
-};
+  enum class Tag
+  {
+    /// The argument is implied to be located at an index
+    ArgumentImplicitlyIs,
+    /// The argument is located at a specific index given in the format,
+    ArgumentIs,
+    /// The argument has a name.
+    ArgumentNamed,
+  };
 
-struct Alignment
-{
+  struct ArgumentImplicitlyIs_Body
+  {
+    size_t _0;
+  };
+
+  struct ArgumentIs_Body
+  {
+    size_t _0;
+  };
+
+  struct ArgumentNamed_Body
+  {
+    RustHamster _0;
+  };
+
+  Tag tag;
+  union
+  {
+    ArgumentImplicitlyIs_Body argument_implicitly_is;
+    ArgumentIs_Body argument_is;
+    ArgumentNamed_Body argument_named;
+  };
 };
 
-struct RustString
+/// Range inside of a `Span` used for diagnostics when we only have access to
+/// relative positions.
+struct InnerSpan
 {
-  // hehe
+  size_t start;
+  size_t end;
 };
 
-struct Position
+/// A count is used for the precision and width parameters of an integer, and
+/// can reference either an argument or a literal integer.
+struct Count
 {
+  enum class Tag
+  {
+    /// The count is specified explicitly.
+    CountIs,
+    /// The count is specified by the argument with the given name.
+    CountIsName,
+    /// The count is specified by the argument at the given index.
+    CountIsParam,
+    /// The count is specified by a star (like in `{:.*}`) that refers to the
+    /// argument at the given index.
+    CountIsStar,
+    /// The count is implied and cannot be explicitly specified.
+    CountImplied,
+  };
+
+  struct CountIs_Body
+  {
+    size_t _0;
+  };
+
+  struct CountIsName_Body
+  {
+    RustHamster _0;
+    InnerSpan _1;
+  };
+
+  struct CountIsParam_Body
+  {
+    size_t _0;
+  };
+
+  struct CountIsStar_Body
+  {
+    size_t _0;
+  };
+
+  Tag tag;
+  union
+  {
+    CountIs_Body count_is;
+    CountIsName_Body count_is_name;
+    CountIsParam_Body count_is_param;
+    CountIsStar_Body count_is_star;
+  };
 };
 
+/// Specification for the formatting of an argument in the format string.
 struct FormatSpec
 {
   /// Optionally specified character to fill alignment with.
-  tl::optional<char /* FIXME: This is a Rust char, not a C++ char - use an uint32_t instead?  */> fill;
+  const uint32_t *fill;
   /// Span of the optionally specified fill character.
-  tl::optional<InnerSpan> fill_span;
+  const InnerSpan *fill_span;
   /// Optionally specified alignment.
   Alignment align;
   /// The `+` or `-` flag.
-  tl::optional<Sign> sign;
+  const Sign *sign;
   /// The `#` flag.
   bool alternate;
   /// The `0` flag.
   bool zero_pad;
   /// The `x` or `X` flag. (Only for `Debug`.)
-  tl::optional<DebugHex> debug_hex;
+  const DebugHex *debug_hex;
   /// The integer precision to use.
   Count precision;
   /// The span of the precision formatting flag (for diagnostics).
-  tl::optional<InnerSpan> precision_span;
+  const InnerSpan *precision_span;
   /// The string width requested for the resulting format.
   Count width;
   /// The span of the width formatting flag (for diagnostics).
-  tl::optional<InnerSpan> width_span;
+  const InnerSpan *width_span;
   /// The descriptor string representing the name of the format desired for
   /// this argument, this can be empty or any number of characters, although
   /// it is required to be one word.
   RustHamster ty;
-  // &'a str ty;
   /// The span of the descriptor string (for diagnostics).
-  tl::optional<InnerSpan> ty_span;
+  const InnerSpan *ty_span;
 };
 
+/// Representation of an argument specification.
 struct Argument
 {
+  /// Where to find this argument
   Position position;
-  InnerSpan inner_span;
+  /// The span of the position indicator. Includes any whitespace in implicit
+  /// positions (`{  }`).
+  InnerSpan position_span;
+  /// How to format the argument
   FormatSpec format;
 };
 
+/// A piece is a portion of the format string which represents the next part
+/// to emit. These are emitted as a stream by the `Parser` class.
 struct Piece
 {
-  enum class Kind
+  enum class Tag
   {
+    /// A literal string which should directly be emitted
     String,
-    NextArgument
-  } kind;
+    /// This describes that formatting should process the next argument (as
+    /// specified inside) for emission.
+    NextArgument,
+  };
+
+  struct String_Body
+  {
+    RustHamster _0;
+  };
+
+  struct NextArgument_Body
+  {
+    const Argument *_0;
+  };
 
+  Tag tag;
   union
   {
-    RustString string;
-    Argument *next_argument;
-  } data;
+    String_Body string;
+    NextArgument_Body next_argument;
+  };
 };
 
 struct PieceSlice
 {
-  Piece *ptr;
+  const Piece *base_ptr;
   size_t len;
 };
 
 extern "C" {
+
 PieceSlice
-collect_pieces (const char *);
-}
+collect_pieces (const char *input);
+
+} // extern "C"
 
 struct Pieces
 {
@@ -149,4 +254,4 @@ struct Pieces
 } // namespace Fmt
 } // namespace Rust
 
-#endif // ! RUST_FMT_H
+#endif // !RUST_FMT_H
diff --git a/libgrust/libformat_parser/libformat-parser.h b/libgrust/libformat_parser/libformat-parser.h
deleted file mode 100644
index a4bc8a754944..000000000000
--- a/libgrust/libformat_parser/libformat-parser.h
+++ /dev/null
@@ -1,224 +0,0 @@
-#include <cstdarg>
-#include <cstdint>
-#include <cstdlib>
-#include <ostream>
-#include <new>
-
-/// Enum of alignments which are supported.
-enum class Alignment
-{
-  /// The value will be aligned to the left.
-  AlignLeft,
-  /// The value will be aligned to the right.
-  AlignRight,
-  /// The value will be aligned in the center.
-  AlignCenter,
-  /// The value will take on a default alignment.
-  AlignUnknown,
-};
-
-/// Enum for the debug hex flags.
-enum class DebugHex
-{
-  /// The `x` flag in `{:x?}`.
-  Lower,
-  /// The `X` flag in `{:X?}`.
-  Upper,
-};
-
-/// Enum for the sign flags.
-enum class Sign
-{
-  /// The `+` flag.
-  Plus,
-  /// The `-` flag.
-  Minus,
-};
-
-template <typename T = void> struct Box;
-
-template <typename T = void> struct Option;
-
-/// Enum describing where an argument for a format can be located.
-struct Position
-{
-  enum class Tag
-  {
-    /// The argument is implied to be located at an index
-    ArgumentImplicitlyIs,
-    /// The argument is located at a specific index given in the format,
-    ArgumentIs,
-    /// The argument has a name.
-    ArgumentNamed,
-  };
-
-  struct ArgumentImplicitlyIs_Body
-  {
-    uintptr_t _0;
-  };
-
-  struct ArgumentIs_Body
-  {
-    uintptr_t _0;
-  };
-
-  struct ArgumentNamed_Body
-  {
-    const str *_0;
-  };
-
-  Tag tag;
-  union
-  {
-    ArgumentImplicitlyIs_Body argument_implicitly_is;
-    ArgumentIs_Body argument_is;
-    ArgumentNamed_Body argument_named;
-  };
-};
-
-/// Range inside of a `Span` used for diagnostics when we only have access to
-/// relative positions.
-struct InnerSpan
-{
-  uintptr_t start;
-  uintptr_t end;
-};
-
-/// A count is used for the precision and width parameters of an integer, and
-/// can reference either an argument or a literal integer.
-struct Count
-{
-  enum class Tag
-  {
-    /// The count is specified explicitly.
-    CountIs,
-    /// The count is specified by the argument with the given name.
-    CountIsName,
-    /// The count is specified by the argument at the given index.
-    CountIsParam,
-    /// The count is specified by a star (like in `{:.*}`) that refers to the
-    /// argument at the given index.
-    CountIsStar,
-    /// The count is implied and cannot be explicitly specified.
-    CountImplied,
-  };
-
-  struct CountIs_Body
-  {
-    uintptr_t _0;
-  };
-
-  struct CountIsName_Body
-  {
-    const str *_0;
-    InnerSpan _1;
-  };
-
-  struct CountIsParam_Body
-  {
-    uintptr_t _0;
-  };
-
-  struct CountIsStar_Body
-  {
-    uintptr_t _0;
-  };
-
-  Tag tag;
-  union
-  {
-    CountIs_Body count_is;
-    CountIsName_Body count_is_name;
-    CountIsParam_Body count_is_param;
-    CountIsStar_Body count_is_star;
-  };
-};
-
-/// Specification for the formatting of an argument in the format string.
-struct FormatSpec
-{
-  /// Optionally specified character to fill alignment with.
-  Option<uint32_t> fill;
-  /// Span of the optionally specified fill character.
-  Option<InnerSpan> fill_span;
-  /// Optionally specified alignment.
-  Alignment align;
-  /// The `+` or `-` flag.
-  Option<Sign> sign;
-  /// The `#` flag.
-  bool alternate;
-  /// The `0` flag.
-  bool zero_pad;
-  /// The `x` or `X` flag. (Only for `Debug`.)
-  Option<DebugHex> debug_hex;
-  /// The integer precision to use.
-  Count precision;
-  /// The span of the precision formatting flag (for diagnostics).
-  Option<InnerSpan> precision_span;
-  /// The string width requested for the resulting format.
-  Count width;
-  /// The span of the width formatting flag (for diagnostics).
-  Option<InnerSpan> width_span;
-  /// The descriptor string representing the name of the format desired for
-  /// this argument, this can be empty or any number of characters, although
-  /// it is required to be one word.
-  const str *ty;
-  /// The span of the descriptor string (for diagnostics).
-  Option<InnerSpan> ty_span;
-};
-
-/// Representation of an argument specification.
-struct Argument
-{
-  /// Where to find this argument
-  Position position;
-  /// The span of the position indicator. Includes any whitespace in implicit
-  /// positions (`{  }`).
-  InnerSpan position_span;
-  /// How to format the argument
-  FormatSpec format;
-};
-
-/// A piece is a portion of the format string which represents the next part
-/// to emit. These are emitted as a stream by the `Parser` class.
-struct Piece
-{
-  enum class Tag
-  {
-    /// A literal string which should directly be emitted
-    String,
-    /// This describes that formatting should process the next argument (as
-    /// specified inside) for emission.
-    NextArgument,
-  };
-
-  struct String_Body
-  {
-    const str *_0;
-  };
-
-  struct NextArgument_Body
-  {
-    Box<Argument> _0;
-  };
-
-  Tag tag;
-  union
-  {
-    String_Body string;
-    NextArgument_Body next_argument;
-  };
-};
-
-struct PieceSlice
-{
-  const Piece *base_ptr;
-  uintptr_t len;
-};
-
-extern "C" {
-
-PieceSlice
-collect_pieces (const char *input);
-
-} // extern "C"
diff --git a/libgrust/libformat_parser/src/lib.rs b/libgrust/libformat_parser/src/lib.rs
index 49821e7cd2f4..4bbc468c7557 100644
--- a/libgrust/libformat_parser/src/lib.rs
+++ b/libgrust/libformat_parser/src/lib.rs
@@ -5,8 +5,31 @@
 
 use std::ffi::CStr;
 
+trait IntoFFI {
+    type Output;
+
+    fn into_ffi(&self) -> Self::Output;
+}
+
+impl<T> IntoFFI for Option<T>
+where
+    T: Sized,
+{
+    type Output = *const T;
+
+    fn into_ffi(&self) -> Self::Output {
+        match self.as_ref() {
+            None => std::ptr::null(),
+            Some(r) => r as *const T,
+        }
+    }
+}
+
+// FIXME: Make an ffi module in a separate file
+// FIXME: Remember to leak the boxed type somehow
+// FIXME: How to encode the Option type? As a pointer? Option<T> -> Option<&T> -> *const T could work maybe?
 mod ffi {
-    use std::ops::Deref;
+    use super::IntoFFI;
 
     // Note: copied from rustc_span
     /// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
@@ -102,31 +125,31 @@ mod ffi {
         /// Optionally specified character to fill alignment with.
         pub fill: Option<char>,
         /// Span of the optionally specified fill character.
-        pub fill_span: Option<InnerSpan>,
+        pub fill_span: *const InnerSpan,
         /// Optionally specified alignment.
         pub align: Alignment,
         /// The `+` or `-` flag.
-        pub sign: Option<Sign>,
+        pub sign: *const Sign,
         /// The `#` flag.
         pub alternate: bool,
         /// The `0` flag.
         pub zero_pad: bool,
         /// The `x` or `X` flag. (Only for `Debug`.)
-        pub debug_hex: Option<DebugHex>,
+        pub debug_hex: *const DebugHex,
         /// The integer precision to use.
         pub precision: Count<'a>,
         /// The span of the precision formatting flag (for diagnostics).
-        pub precision_span: Option<InnerSpan>,
+        pub precision_span: *const InnerSpan,
         /// The string width requested for the resulting format.
         pub width: Count<'a>,
         /// The span of the width formatting flag (for diagnostics).
-        pub width_span: Option<InnerSpan>,
+        pub width_span: *const InnerSpan,
         /// The descriptor string representing the name of the format desired for
         /// this argument, this can be empty or any number of characters, although
         /// it is required to be one word.
         pub ty: &'a str,
         /// The span of the descriptor string (for diagnostics).
-        pub ty_span: Option<InnerSpan>,
+        pub ty_span: *const InnerSpan,
     }
 
     /// Enum describing where an argument for a format can be located.
@@ -197,6 +220,11 @@ mod ffi {
             match old {
                 generic_format_parser::Piece::String(x) => Piece::String(x),
                 generic_format_parser::Piece::NextArgument(x) => {
+                    // FIXME: This is problematic - if we do this, then we probably run into the issue that the Box
+                    // is freed at the end of the call to collect_pieces. if we just .leak() it, then we have
+                    // a memory leak... should we resend the info back to the Rust lib afterwards to free it?
+                    // this is definitely the best way - store that pointer in the FFI piece and rebuild the box
+                    // in a Rust destructor
                     Piece::NextArgument(Box::new(Into::<Argument>::into(*x)))
                 }
             }
@@ -240,18 +268,18 @@ mod ffi {
         fn from(old: generic_format_parser::FormatSpec<'a>) -> Self {
             FormatSpec {
                 fill: old.fill,
-                fill_span: old.fill_span.map(Into::into),
+                fill_span: old.fill_span.map(Into::into).into_ffi(),
                 align: old.align.into(),
-                sign: old.sign.map(Into::into),
+                sign: old.sign.map(Into::into).into_ffi(),
                 alternate: old.alternate,
                 zero_pad: old.zero_pad,
-                debug_hex: old.debug_hex.map(Into::into),
+                debug_hex: old.debug_hex.map(Into::into).into_ffi(),
                 precision: old.precision.into(),
-                precision_span: old.precision_span.map(Into::into),
+                precision_span: old.precision_span.map(Into::into).into_ffi(),
                 width: old.width.into(),
-                width_span: old.width_span.map(Into::into),
+                width_span: old.width_span.map(Into::into).into_ffi(),
                 ty: old.ty,
-                ty_span: old.ty_span.map(Into::into),
+                ty_span: old.ty_span.map(Into::into).into_ffi(),
             }
         }
     }
@@ -327,6 +355,8 @@ pub extern "C" fn collect_pieces(input: *const libc::c_char) -> PieceSlice {
         .map(Into::into)
         .collect();
 
+    println!("debug: {:?}, {:?}", pieces.as_ptr(), pieces.len());
+
     PieceSlice {
         base_ptr: pieces.as_ptr(),
         len: pieces.len(),

From c3006f03941ce13233c7bf37fdf71d98c76ef916 Mon Sep 17 00:00:00 2001
From: Arthur Cohen <arthur.cohen@embecosm.com>
Date: Tue, 30 Jan 2024 01:48:13 +0100
Subject: [PATCH 07/13] libformat_parser: Send boxed values across FFI properly

gcc/rust/ChangeLog:

	* ast/rust-fmt.cc (Pieces::~Pieces): Call libformat_parser's release
	function in destructor.
	* ast/rust-fmt.h (struct PieceSlice): Add capacity.
	(destroy_pieces): New.
	(struct Pieces): Add destructor.

libgrust/ChangeLog:

	* libformat_parser/src/lib.rs: Leak Boxes properly for C++ to
	see them, add memory release function.
---
 gcc/rust/ast/rust-fmt.cc             |  4 +-
 gcc/rust/ast/rust-fmt.h              |  9 +++
 libgrust/libformat_parser/src/lib.rs | 94 ++++++++++++++--------------
 3 files changed, 58 insertions(+), 49 deletions(-)

diff --git a/gcc/rust/ast/rust-fmt.cc b/gcc/rust/ast/rust-fmt.cc
index a7c4341c52db..f6ee8a209137 100644
--- a/gcc/rust/ast/rust-fmt.cc
+++ b/gcc/rust/ast/rust-fmt.cc
@@ -34,8 +34,10 @@ Pieces::collect (const std::string &to_parse)
   // auto pieces = std::vector<Piece> (piece_slice.base_ptr,
   // 	     piece_slice.base_ptr + piece_slice.len);
 
-  return Pieces{};
+  return Pieces (piece_slice);
 }
 
+Pieces::~Pieces () { destroy_pieces (slice); }
+
 } // namespace Fmt
 } // namespace Rust
diff --git a/gcc/rust/ast/rust-fmt.h b/gcc/rust/ast/rust-fmt.h
index 7ec9a2a199dd..50aeff6433ee 100644
--- a/gcc/rust/ast/rust-fmt.h
+++ b/gcc/rust/ast/rust-fmt.h
@@ -237,6 +237,7 @@ struct PieceSlice
 {
   const Piece *base_ptr;
   size_t len;
+  size_t cap;
 };
 
 extern "C" {
@@ -244,11 +245,19 @@ extern "C" {
 PieceSlice
 collect_pieces (const char *input);
 
+void destroy_pieces (PieceSlice);
+
 } // extern "C"
 
 struct Pieces
 {
   static Pieces collect (const std::string &to_parse);
+  ~Pieces ();
+
+private:
+  Pieces (PieceSlice slice) : slice (slice) {}
+
+  PieceSlice slice;
 };
 
 } // namespace Fmt
diff --git a/libgrust/libformat_parser/src/lib.rs b/libgrust/libformat_parser/src/lib.rs
index 4bbc468c7557..9b2bffed05d4 100644
--- a/libgrust/libformat_parser/src/lib.rs
+++ b/libgrust/libformat_parser/src/lib.rs
@@ -3,21 +3,17 @@
 // what's the plan? Have a function return something that can be constructed into a vector?
 // or an iterator?
 
-use std::ffi::CStr;
+use std::{ffi::CStr, mem};
 
-trait IntoFFI {
-    type Output;
-
-    fn into_ffi(&self) -> Self::Output;
+trait IntoFFI<T> {
+    fn into_ffi(self) -> T;
 }
 
-impl<T> IntoFFI for Option<T>
+impl<T> IntoFFI<*const T> for Option<T>
 where
     T: Sized,
 {
-    type Output = *const T;
-
-    fn into_ffi(&self) -> Self::Output {
+    fn into_ffi(self) -> *const T {
         match self.as_ref() {
             None => std::ptr::null(),
             Some(r) => r as *const T,
@@ -40,12 +36,6 @@ mod ffi {
         pub end: usize,
     }
 
-    // impl InnerSpan {
-    //     pub fn new(start: usize, end: usize) -> InnerSpan {
-    //         InnerSpan { start, end }
-    //     }
-    // }
-
     /// The location and before/after width of a character whose width has changed from its source code
     /// representation
     #[derive(Copy, Clone, PartialEq, Eq)]
@@ -59,35 +49,27 @@ mod ffi {
         pub after: usize,
     }
 
-    // impl InnerWidthMapping {
-    //     pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
-    //         InnerWidthMapping {
-    //             position,
-    //             before,
-    //             after,
-    //         }
-    //     }
+    // TODO: Not needed for now?
+    // /// Whether the input string is a literal. If yes, it contains the inner width mappings.
+    // #[derive(Clone, PartialEq, Eq)]
+    // #[repr(C)]
+    // enum InputStringKind {
+    //     NotALiteral,
+    //     Literal {
+    //         width_mappings: Vec<InnerWidthMapping>,
+    //     },
     // }
 
-    /// Whether the input string is a literal. If yes, it contains the inner width mappings.
-    #[derive(Clone, PartialEq, Eq)]
-    #[repr(C)]
-    enum InputStringKind {
-        NotALiteral,
-        Literal {
-            width_mappings: Vec<InnerWidthMapping>,
-        },
-    }
-
-    /// The type of format string that we are parsing.
-    #[derive(Copy, Clone, Debug, Eq, PartialEq)]
-    #[repr(C)]
-    pub enum ParseMode {
-        /// A normal format string as per `format_args!`.
-        Format,
-        /// An inline assembly template string for `asm!`.
-        InlineAsm,
-    }
+    // TODO: Not needed for now?
+    // /// The type of format string that we are parsing.
+    // #[derive(Copy, Clone, Debug, Eq, PartialEq)]
+    // #[repr(C)]
+    // pub enum ParseMode {
+    //     /// A normal format string as per `format_args!`.
+    //     Format,
+    //     /// An inline assembly template string for `asm!`.
+    //     InlineAsm,
+    // }
 
     #[derive(Copy, Clone)]
     #[repr(C)]
@@ -102,7 +84,13 @@ mod ffi {
         String(&'a str),
         /// This describes that formatting should process the next argument (as
         /// specified inside) for emission.
-        NextArgument(Box<Argument<'a>>),
+        NextArgument(*const Argument<'a>),
+    }
+
+    impl<'a> Drop for Piece<'a> {
+        fn drop(&mut self) {
+            println!("dropping Piece: {:?}", self)
+        }
     }
 
     /// Representation of an argument specification.
@@ -225,7 +213,10 @@ mod ffi {
                     // a memory leak... should we resend the info back to the Rust lib afterwards to free it?
                     // this is definitely the best way - store that pointer in the FFI piece and rebuild the box
                     // in a Rust destructor
-                    Piece::NextArgument(Box::new(Into::<Argument>::into(*x)))
+                    let ptr = Box::leak(x);
+                    let dst = Into::<Argument>::into(*ptr);
+
+                    Piece::NextArgument(&dst as *const Argument)
                 }
             }
         }
@@ -331,17 +322,18 @@ pub mod rust {
     use generic_format_parser::{ParseMode, Parser, Piece};
 
     pub fn collect_pieces(input: &str) -> Vec<Piece<'_>> {
-        // let parser = Parser::new();
         let parser = Parser::new(input, None, None, true, ParseMode::Format);
 
         parser.into_iter().collect()
     }
 }
 
+// TODO: Should we instead make an FFIVector struct?
 #[repr(C)]
 pub struct PieceSlice {
-    base_ptr: *const ffi::Piece<'static /* FIXME: That's wrong */>,
+    base_ptr: *mut ffi::Piece<'static /* FIXME: That's wrong */>,
     len: usize,
+    cap: usize,
 }
 
 #[no_mangle]
@@ -355,10 +347,16 @@ pub extern "C" fn collect_pieces(input: *const libc::c_char) -> PieceSlice {
         .map(Into::into)
         .collect();
 
-    println!("debug: {:?}, {:?}", pieces.as_ptr(), pieces.len());
+    println!("[ARTHUR]: debug: {:?}, {:?}", pieces.as_ptr(), pieces.len());
 
     PieceSlice {
-        base_ptr: pieces.as_ptr(),
         len: pieces.len(),
+        cap: pieces.capacity(),
+        base_ptr: pieces.leak().as_mut_ptr(),
     }
 }
+
+#[no_mangle]
+pub extern "C" fn destroy_pieces(PieceSlice { base_ptr, len, cap }: PieceSlice) {
+    let _ = unsafe { Vec::from_raw_parts(base_ptr, len, cap) };
+}

From dc76d45a8a4916c091f7afdd0cdb45681657e930 Mon Sep 17 00:00:00 2001
From: Arthur Cohen <arthur.cohen@embecosm.com>
Date: Tue, 30 Jan 2024 16:16:36 +0100
Subject: [PATCH 08/13] format_args: Parse format string properly

gcc/rust/ChangeLog:

	* expand/rust-macro-builtins.cc (MacroBuiltin::format_args_handler):
	Construct string to parser properly.
---
 gcc/rust/expand/rust-macro-builtins.cc | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/rust/expand/rust-macro-builtins.cc b/gcc/rust/expand/rust-macro-builtins.cc
index 0e57406f10f8..19ea91094539 100644
--- a/gcc/rust/expand/rust-macro-builtins.cc
+++ b/gcc/rust/expand/rust-macro-builtins.cc
@@ -947,7 +947,24 @@ tl::optional<AST::Fragment>
 MacroBuiltin::format_args_handler (location_t invoc_locus,
 				   AST::MacroInvocData &invoc)
 {
-  Fmt::Pieces::collect ("heyo this {is} what I {} want to {3}, {parse}");
+  auto fmt_expr
+    = parse_single_string_literal (BuiltinMacro::FormatArgs,
+				   invoc.get_delim_tok_tree (), invoc_locus,
+				   invoc.get_expander ());
+
+  if (!fmt_expr)
+    return AST::Fragment::create_error ();
+
+  // if it is not a literal, it's an eager macro invocation - return it
+  if (!fmt_expr->is_literal ())
+    {
+      auto token_tree = invoc.get_delim_tok_tree ();
+      return AST::Fragment ({AST::SingleASTNode (std::move (fmt_expr))},
+			    token_tree.to_token_stream ());
+    }
+
+  auto format_string = fmt_expr->as_string ();
+  auto pieces = Fmt::Pieces::collect (format_string);
 
   return AST::Fragment::create_empty ();
 }

From 7a556de4061e384dce6b847e3c8ab98c3b33dde7 Mon Sep 17 00:00:00 2001
From: Arthur Cohen <arthur.cohen@embecosm.com>
Date: Wed, 7 Feb 2024 12:46:16 +0100
Subject: [PATCH 09/13] format_args: Parse entire token invocation

gcc/rust/ChangeLog:

	* expand/rust-macro-builtins.cc (MacroBuiltin::format_args_handler):
	Transform entire invocation token stream into string for the parser.
---
 gcc/rust/expand/rust-macro-builtins.cc | 40 ++++++++++++++------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/gcc/rust/expand/rust-macro-builtins.cc b/gcc/rust/expand/rust-macro-builtins.cc
index 19ea91094539..2af05a5e3777 100644
--- a/gcc/rust/expand/rust-macro-builtins.cc
+++ b/gcc/rust/expand/rust-macro-builtins.cc
@@ -16,6 +16,8 @@
 // along with GCC; see the file COPYING3.  If not see
 // <http://www.gnu.org/licenses/>.
 
+#include "libproc_macro_internal/tokenstream.h"
+#include "rust-token-converter.h"
 #include "rust-system.h"
 #include "rust-macro-builtins.h"
 #include "rust-ast-fragment.h"
@@ -947,24 +949,26 @@ tl::optional<AST::Fragment>
 MacroBuiltin::format_args_handler (location_t invoc_locus,
 				   AST::MacroInvocData &invoc)
 {
-  auto fmt_expr
-    = parse_single_string_literal (BuiltinMacro::FormatArgs,
-				   invoc.get_delim_tok_tree (), invoc_locus,
-				   invoc.get_expander ());
-
-  if (!fmt_expr)
-    return AST::Fragment::create_error ();
-
-  // if it is not a literal, it's an eager macro invocation - return it
-  if (!fmt_expr->is_literal ())
-    {
-      auto token_tree = invoc.get_delim_tok_tree ();
-      return AST::Fragment ({AST::SingleASTNode (std::move (fmt_expr))},
-			    token_tree.to_token_stream ());
-    }
-
-  auto format_string = fmt_expr->as_string ();
-  auto pieces = Fmt::Pieces::collect (format_string);
+  auto tokens = invoc.get_delim_tok_tree ().to_token_stream ();
+  tokens.erase (tokens.begin ());
+  tokens.pop_back ();
+
+  std::stringstream stream;
+  for (const auto &tok : tokens)
+    stream << tok->as_string () << ' ';
+
+  rust_debug ("[ARTHU]: `%s`", stream.str ().c_str ());
+
+  // FIXME: We need to handle this
+  // // if it is not a literal, it's an eager macro invocation - return it
+  // if (!fmt_expr->is_literal ())
+  //   {
+  //     auto token_tree = invoc.get_delim_tok_tree ();
+  //     return AST::Fragment ({AST::SingleASTNode (std::move (fmt_expr))},
+  // 	    token_tree.to_token_stream ());
+  //   }
+
+  auto pieces = Fmt::Pieces::collect (stream.str ());
 
   return AST::Fragment::create_empty ();
 }

From a829fc4acf82c62c1d1e3cf9aec871900712c3eb Mon Sep 17 00:00:00 2001
From: Arthur Cohen <arthur.cohen@embecosm.com>
Date: Tue, 13 Feb 2024 16:31:25 +0100
Subject: [PATCH 10/13] rust-fmt: Store parsed string in Pieces struct

gcc/rust/ChangeLog:

	* ast/rust-fmt.cc (Pieces::collect): Fix signature to take ownership
	of the given string.
	* ast/rust-fmt.h (struct Pieces): Store parsed string in the struct.

libgrust/ChangeLog:

	* libformat_parser/src/lib.rs: Add debug prompt.
---
 gcc/rust/ast/rust-fmt.cc             | 4 ++--
 gcc/rust/ast/rust-fmt.h              | 7 +++++--
 libgrust/libformat_parser/src/lib.rs | 1 +
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/gcc/rust/ast/rust-fmt.cc b/gcc/rust/ast/rust-fmt.cc
index f6ee8a209137..511e94740c5e 100644
--- a/gcc/rust/ast/rust-fmt.cc
+++ b/gcc/rust/ast/rust-fmt.cc
@@ -23,7 +23,7 @@ namespace Rust {
 namespace Fmt {
 
 Pieces
-Pieces::collect (const std::string &to_parse)
+Pieces::collect (std::string &&to_parse)
 {
   auto piece_slice = collect_pieces (to_parse.c_str ());
 
@@ -34,7 +34,7 @@ Pieces::collect (const std::string &to_parse)
   // auto pieces = std::vector<Piece> (piece_slice.base_ptr,
   // 	     piece_slice.base_ptr + piece_slice.len);
 
-  return Pieces (piece_slice);
+  return Pieces (piece_slice, std::move (to_parse));
 }
 
 Pieces::~Pieces () { destroy_pieces (slice); }
diff --git a/gcc/rust/ast/rust-fmt.h b/gcc/rust/ast/rust-fmt.h
index 50aeff6433ee..0bf9695bb6d2 100644
--- a/gcc/rust/ast/rust-fmt.h
+++ b/gcc/rust/ast/rust-fmt.h
@@ -251,13 +251,16 @@ void destroy_pieces (PieceSlice);
 
 struct Pieces
 {
-  static Pieces collect (const std::string &to_parse);
+  static Pieces collect (std::string &&to_parse);
   ~Pieces ();
 
 private:
-  Pieces (PieceSlice slice) : slice (slice) {}
+  Pieces (PieceSlice slice, std::string &&to_parse)
+    : slice (slice), to_parse (std::move (to_parse))
+  {}
 
   PieceSlice slice;
+  std::string to_parse;
 };
 
 } // namespace Fmt
diff --git a/libgrust/libformat_parser/src/lib.rs b/libgrust/libformat_parser/src/lib.rs
index 9b2bffed05d4..eb3e1060e5d8 100644
--- a/libgrust/libformat_parser/src/lib.rs
+++ b/libgrust/libformat_parser/src/lib.rs
@@ -340,6 +340,7 @@ pub struct PieceSlice {
 pub extern "C" fn collect_pieces(input: *const libc::c_char) -> PieceSlice {
     // FIXME: Add comment
     let str = unsafe { CStr::from_ptr(input) };
+    dbg!(str);
 
     // FIXME: No unwrap
     let pieces: Vec<ffi::Piece<'_>> = rust::collect_pieces(str.to_str().unwrap())

From 3cd6cd76b570af74bafdd277368f0b717dda597b Mon Sep 17 00:00:00 2001
From: Arthur Cohen <arthur.cohen@embecosm.com>
Date: Thu, 15 Feb 2024 13:11:26 +0100
Subject: [PATCH 11/13] libformat_parser: Fix Rust warnings.

libgrust/ChangeLog:

	* libformat_parser/generic_format_parser/src/lib.rs: Remove
	unused deprecated attribute and unused import.
	* libformat_parser/src/lib.rs: Remove unused import.
---
 libgrust/libformat_parser/generic_format_parser/src/lib.rs | 2 --
 libgrust/libformat_parser/src/lib.rs                       | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/libgrust/libformat_parser/generic_format_parser/src/lib.rs b/libgrust/libformat_parser/generic_format_parser/src/lib.rs
index 87a20dc18c56..6a366177f252 100644
--- a/libgrust/libformat_parser/generic_format_parser/src/lib.rs
+++ b/libgrust/libformat_parser/generic_format_parser/src/lib.rs
@@ -14,12 +14,10 @@
 // WARNING: We want to be able to build this crate with a stable compiler,
 //          so no `#![feature]` attributes should be added!
 
-#[deprecated(note = "Use a proper lexer function for this")]
 fn is_id_start(c: char) -> bool {
     c == '_' || unicode_xid::UnicodeXID::is_xid_start(c)
 }
 
-#[deprecated(note = "Use a proper lexer function for this")]
 fn is_id_continue(c: char) -> bool {
     unicode_xid::UnicodeXID::is_xid_continue(c)
 }
diff --git a/libgrust/libformat_parser/src/lib.rs b/libgrust/libformat_parser/src/lib.rs
index eb3e1060e5d8..c164578a1039 100644
--- a/libgrust/libformat_parser/src/lib.rs
+++ b/libgrust/libformat_parser/src/lib.rs
@@ -3,7 +3,7 @@
 // what's the plan? Have a function return something that can be constructed into a vector?
 // or an iterator?
 
-use std::{ffi::CStr, mem};
+use std::ffi::CStr;
 
 trait IntoFFI<T> {
     fn into_ffi(self) -> T;

From a32eeae202f00488ccb60ea367aecc05f85b1e36 Mon Sep 17 00:00:00 2001
From: Arthur Cohen <arthur.cohen@embecosm.com>
Date: Mon, 26 Feb 2024 11:55:47 +0100
Subject: [PATCH 12/13] format-parser: Add `is_some_and` method for Option<T>

Workaround for Ubuntu 18.04, since we still use it for the GCC 4.8 CI.
The default Rust package is 1.65 (and unlikely to change I assume?),
but the generic format parser library uses `is_some_and` which was
introduced in 1.70. So this is a simple reimplementation, directly taken
from the standard library sources.

libgrust/ChangeLog:

	* libformat_parser/generic_format_parser/src/lib.rs: Add IsSomeAnd<T>
	trait, impl it for Option<T>.
---
 .../generic_format_parser/src/lib.rs             | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/libgrust/libformat_parser/generic_format_parser/src/lib.rs b/libgrust/libformat_parser/generic_format_parser/src/lib.rs
index 6a366177f252..8062bf9e5cec 100644
--- a/libgrust/libformat_parser/generic_format_parser/src/lib.rs
+++ b/libgrust/libformat_parser/generic_format_parser/src/lib.rs
@@ -22,6 +22,22 @@ fn is_id_continue(c: char) -> bool {
     unicode_xid::UnicodeXID::is_xid_continue(c)
 }
 
+// Workaround for Ubuntu 18.04. The default Rust package is 1.65 (and unlikely to change I assume?), but the
+// generic format parser library uses `is_some_and` which was introduced in 1.70. So this is a reimplementation,
+// directly taken from the standard library sources
+trait IsSomeAnd<T> {
+    fn is_some_and(self, f: impl FnOnce(T) -> bool) -> bool;
+}
+
+impl<T> IsSomeAnd<T> for Option<T> {
+    fn is_some_and(self, f: impl FnOnce(T) -> bool) -> bool {
+        match self {
+            None => false,
+            Some(x) => f(x),
+        }
+    }
+}
+
 // use rustc_lexer::unescape;
 pub use Alignment::*;
 pub use Count::*;

From 7d2d63900d0cf9c605b968aca5f51c94ced20579 Mon Sep 17 00:00:00 2001
From: Arthur Cohen <arthur.cohen@embecosm.com>
Date: Mon, 26 Feb 2024 11:57:54 +0100
Subject: [PATCH 13/13] ci: Install cargo on ubuntu 18.04 container.

ChangeLog:

	* .github/workflows/ccpp.yml: Install cargo for GCC 4.8 job.
---
 .github/workflows/ccpp.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index 34908b6eec86..10a909019fbd 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -192,7 +192,8 @@ jobs:
                   g++-4.8 \
                   gcc-4.8-multilib \
                   g++-4.8-multilib \
-                  dejagnu
+                  dejagnu \
+                  cargo
 
     - name: Configure
       run: |