From a34578e05198dfac0a6bfee385c0ab7f2b1f5f0c Mon Sep 17 00:00:00 2001
From: Jake Lishman <jake.lishman@ibm.com>
Date: Sun, 25 Aug 2024 17:54:43 +0100
Subject: [PATCH] Move interners from trait to generic structs

This rewrites the interner mechanisms be defined in terms of two generic
structs (`Interner` and `Interned`) that formalise the idea that the
interned values are owned versions of a given reference type, and move
the logically separate intern-related actions (get a key from the value,
and get the value from the key) into separate functions (`get` and
`insert`, respectively).

This has a few advantages:

1. we now have both `insert` and `insert_owned` methods, which was
   awkward to add within the trait-based structure.  This allows a more
   efficient path when the owned variant has already necessarily been
   constructed.

2. additionally, the standard `insert` path now takes only a reference
   type.  For large circuits, most intern lookups will, in general, find
   a pre-existing key, so in situations where the interned value is
   sufficiently small that it can be within a static allocation (which
   is the case for almost all `qargs` and `cargs`), it's more efficient
   not to construct the owned type on the heap.

3. the type of the values retrieved from an interner are no longer
   indirected through the owned type that's stored.  For example,
   where `IndexedInterner<Vec<Qubit>>` gave out `&Vec<Qubit>`s as its
   lookups, `Interner<[Qubit]>` returns the more standard `&[Qubit]`,
   which is only singly indirect rather than double.

The following replacements are made:

1. The `IndexedInterner` struct from before is now just called
   `Interner` (and internally, it uses an `IndexSet` rather than
   manually tracking the indices).  Its generic type is related to the
   references it returns, rather than the owned value it stores, so
   `IndexedInterner<Vec<Qubit>>` becomes `Interner<[Qubit]>`.

2. The `Interner` trait is now gone.  Everything is just accessed as
   concrete methods on the `Interner` struct.

3. `<&IndexedInterner as Interner>::intern` (lookup of the value from an
   interner key) is now called `Interner::get`.

4. `<&mut IndexedInterner as Interner>::intern` (conversion of an owned
   value to an interner key) is now called `Interner::insert_owned`.

5. A new method, `Interner::insert`, can now be used when one need not
   have an owned allocation of the storage type; the correct value will
   be allocated if required (which is expected to be less frequent).

6. The intern key is no longer called `interner::Index`, but
   `Interned<T>`, where the generic parameter `T` matches the generic of
   the `Interner<T>` that gave out the key.
---
 crates/circuit/src/circuit_data.rs       |  56 ++++----
 crates/circuit/src/dag_circuit.rs        | 146 +++++++++----------
 crates/circuit/src/interner.rs           | 176 ++++++++++++++++-------
 crates/circuit/src/packed_instruction.rs |   6 +-
 4 files changed, 220 insertions(+), 164 deletions(-)
diff --git a/crates/circuit/src/circuit_data.rs b/crates/circuit/src/circuit_data.rs
index 4dd3956bee6a..00043c23e51b 100644
--- a/crates/circuit/src/circuit_data.rs
+++ b/crates/circuit/src/circuit_data.rs
@@ -16,7 +16,7 @@ use std::cell::OnceCell;
 use crate::bit_data::BitData;
 use crate::circuit_instruction::{CircuitInstruction, OperationFromPython};
 use crate::imports::{ANNOTATED_OPERATION, CLBIT, QUANTUM_CIRCUIT, QUBIT};
-use crate::interner::{IndexedInterner, Interner};
+use crate::interner::Interner;
 use crate::operations::{Operation, OperationRef, Param, StandardGate};
 use crate::packed_instruction::{PackedInstruction, PackedOperation};
 use crate::parameter_table::{ParameterTable, ParameterTableError, ParameterUse, ParameterUuid};
@@ -91,9 +91,9 @@ pub struct CircuitData {
     /// The packed instruction listing.
     data: Vec<PackedInstruction>,
     /// The cache used to intern instruction bits.
-    qargs_interner: IndexedInterner<Vec<Qubit>>,
+    qargs_interner: Interner<[Qubit]>,
     /// The cache used to intern instruction bits.
-    cargs_interner: IndexedInterner<Vec<Clbit>>,
+    cargs_interner: Interner<[Clbit]>,
     /// Qubits registered in the circuit.
     qubits: BitData<Qubit>,
     /// Clbits registered in the circuit.
@@ -148,8 +148,8 @@ impl CircuitData {
             global_phase,
         )?;
         for (operation, params, qargs, cargs) in instruction_iter {
-            let qubits = (&mut res.qargs_interner).intern(qargs)?;
-            let clbits = (&mut res.cargs_interner).intern(cargs)?;
+            let qubits = res.qargs_interner.insert_owned(qargs);
+            let clbits = res.cargs_interner.insert_owned(cargs);
             let params = (!params.is_empty()).then(|| Box::new(params));
             res.data.push(PackedInstruction {
                 op: operation,
@@ -199,9 +199,9 @@ impl CircuitData {
             instruction_iter.size_hint().0,
             global_phase,
         )?;
-        let no_clbit_index = (&mut res.cargs_interner).intern(Vec::new())?;
+        let no_clbit_index = res.cargs_interner.insert(&[]);
         for (operation, params, qargs) in instruction_iter {
-            let qubits = (&mut res.qargs_interner).intern(qargs.to_vec())?;
+            let qubits = res.qargs_interner.insert(&qargs);
             let params = (!params.is_empty()).then(|| Box::new(params));
             res.data.push(PackedInstruction {
                 op: operation.into(),
@@ -227,8 +227,8 @@ impl CircuitData {
     ) -> PyResult<Self> {
         let mut res = CircuitData {
             data: Vec::with_capacity(instruction_capacity),
-            qargs_interner: IndexedInterner::new(),
-            cargs_interner: IndexedInterner::new(),
+            qargs_interner: Interner::new(),
+            cargs_interner: Interner::new(),
             qubits: BitData::new(py, "qubits".to_string()),
             clbits: BitData::new(py, "clbits".to_string()),
             param_table: ParameterTable::new(),
@@ -258,9 +258,9 @@ impl CircuitData {
         params: &[Param],
         qargs: &[Qubit],
     ) -> PyResult<()> {
-        let no_clbit_index = (&mut self.cargs_interner).intern(Vec::new())?;
+        let no_clbit_index = self.cargs_interner.insert(&[]);
         let params = (!params.is_empty()).then(|| Box::new(params.iter().cloned().collect()));
-        let qubits = (&mut self.qargs_interner).intern(qargs.to_vec())?;
+        let qubits = self.qargs_interner.insert(qargs);
         self.data.push(PackedInstruction {
             op: operation.into(),
             qubits,
@@ -351,8 +351,8 @@ impl CircuitData {
     ) -> PyResult<Self> {
         let mut self_ = CircuitData {
             data: Vec::new(),
-            qargs_interner: IndexedInterner::new(),
-            cargs_interner: IndexedInterner::new(),
+            qargs_interner: Interner::new(),
+            cargs_interner: Interner::new(),
             qubits: BitData::new(py, "qubits".to_string()),
             clbits: BitData::new(py, "clbits".to_string()),
             param_table: ParameterTable::new(),
@@ -572,10 +572,10 @@ impl CircuitData {
         let qubits = PySet::empty_bound(py)?;
         let clbits = PySet::empty_bound(py)?;
         for inst in self.data.iter() {
-            for b in self.qargs_interner.intern(inst.qubits) {
+            for b in self.qargs_interner.get(inst.qubits) {
                 qubits.add(self.qubits.get(*b).unwrap().clone_ref(py))?;
             }
-            for b in self.cargs_interner.intern(inst.clbits) {
+            for b in self.cargs_interner.get(inst.clbits) {
                 clbits.add(self.clbits.get(*b).unwrap().clone_ref(py))?;
             }
         }
@@ -737,8 +737,8 @@ impl CircuitData {
         // Get a single item, assuming the index is validated as in bounds.
         let get_single = |index: usize| {
             let inst = &self.data[index];
-            let qubits = self.qargs_interner.intern(inst.qubits);
-            let clbits = self.cargs_interner.intern(inst.clbits);
+            let qubits = self.qargs_interner.get(inst.qubits);
+            let clbits = self.cargs_interner.get(inst.clbits);
             CircuitInstruction {
                 operation: inst.op.clone(),
                 qubits: PyTuple::new_bound(py, self.qubits.map_indices(qubits)).unbind(),
@@ -894,7 +894,7 @@ impl CircuitData {
             for inst in other.data.iter() {
                 let qubits = other
                     .qargs_interner
-                    .intern(inst.qubits)
+                    .get(inst.qubits)
                     .iter()
                     .map(|b| {
                         Ok(self
@@ -905,7 +905,7 @@ impl CircuitData {
                     .collect::<PyResult<Vec<Qubit>>>()?;
                 let clbits = other
                     .cargs_interner
-                    .intern(inst.clbits)
+                    .get(inst.clbits)
                     .iter()
                     .map(|b| {
                         Ok(self
@@ -915,8 +915,8 @@ impl CircuitData {
                     })
                     .collect::<PyResult<Vec<Clbit>>>()?;
                 let new_index = self.data.len();
-                let qubits_id = Interner::intern(&mut self.qargs_interner, qubits)?;
-                let clbits_id = Interner::intern(&mut self.cargs_interner, clbits)?;
+                let qubits_id = self.qargs_interner.insert_owned(qubits);
+                let clbits_id = self.cargs_interner.insert_owned(clbits);
                 self.data.push(PackedInstruction {
                     op: inst.op.clone(),
                     qubits: qubits_id,
@@ -1113,14 +1113,12 @@ impl CircuitData {
     }
 
     fn pack(&mut self, py: Python, inst: &CircuitInstruction) -> PyResult<PackedInstruction> {
-        let qubits = Interner::intern(
-            &mut self.qargs_interner,
-            self.qubits.map_bits(inst.qubits.bind(py))?.collect(),
-        )?;
-        let clbits = Interner::intern(
-            &mut self.cargs_interner,
-            self.clbits.map_bits(inst.clbits.bind(py))?.collect(),
-        )?;
+        let qubits = self
+            .qargs_interner
+            .insert_owned(self.qubits.map_bits(inst.qubits.bind(py))?.collect());
+        let clbits = self
+            .cargs_interner
+            .insert_owned(self.clbits.map_bits(inst.clbits.bind(py))?.collect());
         Ok(PackedInstruction {
             op: inst.operation.clone(),
             qubits,
diff --git a/crates/circuit/src/dag_circuit.rs b/crates/circuit/src/dag_circuit.rs
index bb5fff5e343a..1ca69c5b3a85 100644
--- a/crates/circuit/src/dag_circuit.rs
+++ b/crates/circuit/src/dag_circuit.rs
@@ -22,7 +22,7 @@ use crate::dag_node::{DAGInNode, DAGNode, DAGOpNode, DAGOutNode};
 use crate::dot_utils::build_dot;
 use crate::error::DAGCircuitError;
 use crate::imports;
-use crate::interner::{IndexedInterner, Interner};
+use crate::interner::Interner;
 use crate::operations::{Operation, OperationRef, Param, PyInstruction};
 use crate::packed_instruction::PackedInstruction;
 use crate::rustworkx_core_vnext::isomorphism;
@@ -231,9 +231,9 @@ pub struct DAGCircuit {
     cregs: Py<PyDict>,
 
     /// The cache used to intern instruction qargs.
-    qargs_cache: IndexedInterner<Vec<Qubit>>,
+    qargs_cache: Interner<[Qubit]>,
     /// The cache used to intern instruction cargs.
-    cargs_cache: IndexedInterner<Vec<Clbit>>,
+    cargs_cache: Interner<[Clbit]>,
     /// Qubits registered in the circuit.
     pub qubits: BitData<Qubit>,
     /// Clbits registered in the circuit.
@@ -415,8 +415,8 @@ impl DAGCircuit {
             dag: StableDiGraph::default(),
             qregs: PyDict::new_bound(py).unbind(),
             cregs: PyDict::new_bound(py).unbind(),
-            qargs_cache: IndexedInterner::new(),
-            cargs_cache: IndexedInterner::new(),
+            qargs_cache: Interner::new(),
+            cargs_cache: Interner::new(),
             qubits: BitData::new(py, "qubits".to_string()),
             clbits: BitData::new(py, "clbits".to_string()),
             global_phase: Param::Float(0.),
@@ -1210,13 +1210,11 @@ def _format(operand):
         for node_weight in self.dag.node_weights_mut() {
             match node_weight {
                 NodeType::Operation(op) => {
-                    let cargs = self.cargs_cache.intern(op.clbits);
-                    let carg_bits = old_clbits
-                        .map_indices(&cargs[..])
-                        .map(|b| b.bind(py).clone());
-                    let mapped_cargs = self.clbits.map_bits(carg_bits)?.collect();
-                    let clbits = Interner::intern(&mut self.cargs_cache, mapped_cargs)?;
-                    op.clbits = clbits;
+                    let cargs = self.cargs_cache.get(op.clbits);
+                    let carg_bits = old_clbits.map_indices(cargs).map(|b| b.bind(py).clone());
+                    op.clbits = self
+                        .cargs_cache
+                        .insert_owned(self.clbits.map_bits(carg_bits)?.collect());
                 }
                 NodeType::ClbitIn(c) | NodeType::ClbitOut(c) => {
                     *c = self
@@ -1420,13 +1418,11 @@ def _format(operand):
         for node_weight in self.dag.node_weights_mut() {
             match node_weight {
                 NodeType::Operation(op) => {
-                    let qargs = self.qargs_cache.intern(op.qubits);
-                    let qarg_bits = old_qubits
-                        .map_indices(&qargs[..])
-                        .map(|b| b.bind(py).clone());
-                    let mapped_qargs = self.qubits.map_bits(qarg_bits)?.collect();
-                    let qubits = Interner::intern(&mut self.qargs_cache, mapped_qargs)?;
-                    op.qubits = qubits;
+                    let qargs = self.qargs_cache.get(op.qubits);
+                    let qarg_bits = old_qubits.map_indices(qargs).map(|b| b.bind(py).clone());
+                    op.qubits = self
+                        .qargs_cache
+                        .insert_owned(self.qubits.map_bits(qarg_bits)?.collect());
                 }
                 NodeType::QubitIn(q) | NodeType::QubitOut(q) => {
                     *q = self
@@ -1677,14 +1673,12 @@ def _format(operand):
         let qargs = qargs.map(|q| q.value);
         let cargs = cargs.map(|c| c.value);
         let node = {
-            let qubits_id = Interner::intern(
-                &mut self.qargs_cache,
-                self.qubits.map_bits(qargs.iter().flatten())?.collect(),
-            )?;
-            let clbits_id = Interner::intern(
-                &mut self.cargs_cache,
-                self.clbits.map_bits(cargs.iter().flatten())?.collect(),
-            )?;
+            let qubits_id = self
+                .qargs_cache
+                .insert_owned(self.qubits.map_bits(qargs.iter().flatten())?.collect());
+            let clbits_id = self
+                .cargs_cache
+                .insert_owned(self.clbits.map_bits(cargs.iter().flatten())?.collect());
             let instr = PackedInstruction {
                 op: py_op.operation,
                 qubits: qubits_id,
@@ -1733,14 +1727,12 @@ def _format(operand):
         let qargs = qargs.map(|q| q.value);
         let cargs = cargs.map(|c| c.value);
         let node = {
-            let qubits_id = Interner::intern(
-                &mut self.qargs_cache,
-                self.qubits.map_bits(qargs.iter().flatten())?.collect(),
-            )?;
-            let clbits_id = Interner::intern(
-                &mut self.cargs_cache,
-                self.clbits.map_bits(cargs.iter().flatten())?.collect(),
-            )?;
+            let qubits_id = self
+                .qargs_cache
+                .insert_owned(self.qubits.map_bits(qargs.iter().flatten())?.collect());
+            let clbits_id = self
+                .cargs_cache
+                .insert_owned(self.clbits.map_bits(cargs.iter().flatten())?.collect());
             let instr = PackedInstruction {
                 op: py_op.operation,
                 qubits: qubits_id,
@@ -1991,9 +1983,7 @@ def _format(operand):
                 }
                 NodeType::Operation(op) => {
                     let m_qargs = {
-                        let qubits = other
-                            .qubits
-                            .map_indices(other.qargs_cache.intern(op.qubits).as_slice());
+                        let qubits = other.qubits.map_indices(other.qargs_cache.get(op.qubits));
                         let mut mapped = Vec::with_capacity(qubits.len());
                         for bit in qubits {
                             mapped.push(
@@ -2005,9 +1995,7 @@ def _format(operand):
                         PyTuple::new_bound(py, mapped)
                     };
                     let m_cargs = {
-                        let clbits = other
-                            .clbits
-                            .map_indices(other.cargs_cache.intern(op.clbits).as_slice());
+                        let clbits = other.clbits.map_indices(other.cargs_cache.get(op.clbits));
                         let mut mapped = Vec::with_capacity(clbits.len());
                         for bit in clbits {
                             mapped.push(
@@ -2471,10 +2459,10 @@ def _format(operand):
                         return Ok(false);
                     }
                     let check_args = || -> bool {
-                        let node1_qargs = self.qargs_cache.intern(inst1.qubits);
-                        let node2_qargs = other.qargs_cache.intern(inst2.qubits);
-                        let node1_cargs = self.cargs_cache.intern(inst1.clbits);
-                        let node2_cargs = other.cargs_cache.intern(inst2.clbits);
+                        let node1_qargs = self.qargs_cache.get(inst1.qubits);
+                        let node2_qargs = other.qargs_cache.get(inst2.qubits);
+                        let node1_cargs = self.cargs_cache.get(inst1.clbits);
+                        let node2_cargs = other.cargs_cache.get(inst2.clbits);
                         if SEMANTIC_EQ_SYMMETRIC.contains(&inst1.op.name()) {
                             let node1_qargs =
                                 node1_qargs.iter().copied().collect::<HashSet<Qubit>>();
@@ -2752,8 +2740,8 @@ def _format(operand):
             match weight {
                 Some(NodeType::Operation(packed)) => {
                     block_op_names.push(packed.op.name().to_string());
-                    block_qargs.extend(self.qargs_cache.intern(packed.qubits));
-                    block_cargs.extend(self.cargs_cache.intern(packed.clbits));
+                    block_qargs.extend(self.qargs_cache.get(packed.qubits));
+                    block_cargs.extend(self.cargs_cache.get(packed.clbits));
 
                     if let Some(condition) = packed.condition() {
                         block_cargs.extend(
@@ -2828,8 +2816,8 @@ def _format(operand):
         }
 
         let op_name = py_op.operation.name().to_string();
-        let qubits = Interner::intern(&mut self.qargs_cache, block_qargs)?;
-        let clbits = Interner::intern(&mut self.cargs_cache, block_cargs)?;
+        let qubits = self.qargs_cache.insert_owned(block_qargs);
+        let clbits = self.cargs_cache.insert_owned(block_cargs);
         let weight = NodeType::Operation(PackedInstruction {
             op: py_op.operation,
             qubits,
@@ -3182,7 +3170,7 @@ def _format(operand):
                                 "cannot propagate a condition to an element that already has one",
                             ));
                         }
-                        let cargs = input_dag.cargs_cache.intern(inst.clbits);
+                        let cargs = input_dag.cargs_cache.get(inst.clbits);
                         let cargs_bits: Vec<PyObject> = input_dag
                             .clbits
                             .map_indices(cargs)
@@ -3438,12 +3426,12 @@ def _format(operand):
             .collect();
         let mut new_wires: HashSet<Wire> = self
             .qargs_cache
-            .intern(old_packed.qubits)
+            .get(old_packed.qubits)
             .iter()
             .map(|x| Wire::Qubit(*x))
             .chain(
                 self.cargs_cache
-                    .intern(old_packed.clbits)
+                    .get(old_packed.clbits)
                     .iter()
                     .map(|x| Wire::Clbit(*x)),
             )
@@ -3966,7 +3954,7 @@ def _format(operand):
                     continue;
                 }
 
-                let qargs = self.qargs_cache.intern(packed.qubits);
+                let qargs = self.qargs_cache.get(packed.qubits);
                 if qargs.len() == 2 {
                     nodes.push(self.unpack_into(py, node, weight)?);
                 }
@@ -3984,7 +3972,7 @@ def _format(operand):
                     continue;
                 }
 
-                let qargs = self.qargs_cache.intern(packed.qubits);
+                let qargs = self.qargs_cache.get(packed.qubits);
                 if qargs.len() >= 3 {
                     nodes.push(self.unpack_into(py, node, weight)?);
                 }
@@ -4348,7 +4336,7 @@ def _format(operand):
                     py,
                     new_layer
                         .qubits
-                        .map_indices(new_layer.qargs_cache.intern(node.qubits)),
+                        .map_indices(new_layer.qargs_cache.get(node.qubits)),
                 )
             });
             let support_list = PyList::empty_bound(py);
@@ -4381,7 +4369,7 @@ def _format(operand):
             let qubits = PyTuple::new_bound(
                 py,
                 self.qargs_cache
-                    .intern(retrieved_node.qubits)
+                    .get(retrieved_node.qubits)
                     .iter()
                     .map(|qubit| self.qubits.get(*qubit)),
             )
@@ -4685,7 +4673,7 @@ def _format(operand):
                     if processed_non_directive_nodes.contains(&cur_index) {
                         continue;
                     }
-                    qubits_in_cone.extend(self.qargs_cache.intern(packed.qubits).iter());
+                    qubits_in_cone.extend(self.qargs_cache.get(packed.qubits));
                     processed_non_directive_nodes.insert(cur_index);
 
                     for pred_index in self.quantum_predecessors(cur_index) {
@@ -4705,7 +4693,7 @@ def _format(operand):
                         {
                             if self
                                 .qargs_cache
-                                .intern(pred_packed.qubits)
+                                .get(pred_packed.qubits)
                                 .iter()
                                 .any(|x| qubits_in_cone.contains(x))
                             {
@@ -5165,7 +5153,7 @@ impl DAGCircuit {
         let (all_cbits, vars): (Vec<Clbit>, Option<Vec<PyObject>>) = {
             if self.may_have_additional_wires(py, &instr) {
                 let mut clbits: HashSet<Clbit> =
-                    HashSet::from_iter(self.cargs_cache.intern(instr.clbits).iter().copied());
+                    HashSet::from_iter(self.cargs_cache.get(instr.clbits).iter().copied());
                 let (additional_clbits, additional_vars) =
                     self.additional_wires(py, instr.op.view(), instr.condition())?;
                 for clbit in additional_clbits {
@@ -5173,7 +5161,7 @@ impl DAGCircuit {
                 }
                 (clbits.into_iter().collect(), Some(additional_vars))
             } else {
-                (self.cargs_cache.intern(instr.clbits).to_vec(), None)
+                (self.cargs_cache.get(instr.clbits).to_vec(), None)
             }
         };
 
@@ -5186,7 +5174,7 @@ impl DAGCircuit {
         // and the output map.
         let output_nodes: HashSet<NodeIndex> = self
             .qargs_cache
-            .intern(qubits_id)
+            .get(qubits_id)
             .iter()
             .map(|q| self.qubit_io_map.get(q.0 as usize).map(|x| x[1]).unwrap())
             .chain(
@@ -5231,7 +5219,7 @@ impl DAGCircuit {
         let (all_cbits, vars): (Vec<Clbit>, Option<Vec<PyObject>>) = {
             if self.may_have_additional_wires(py, &inst) {
                 let mut clbits: HashSet<Clbit> =
-                    HashSet::from_iter(self.cargs_cache.intern(inst.clbits).iter().cloned());
+                    HashSet::from_iter(self.cargs_cache.get(inst.clbits).iter().copied());
                 let (additional_clbits, additional_vars) =
                     self.additional_wires(py, inst.op.view(), inst.condition())?;
                 for clbit in additional_clbits {
@@ -5239,7 +5227,7 @@ impl DAGCircuit {
                 }
                 (clbits.into_iter().collect(), Some(additional_vars))
             } else {
-                (self.cargs_cache.intern(inst.clbits).to_vec(), None)
+                (self.cargs_cache.get(inst.clbits).to_vec(), None)
             }
         };
 
@@ -5252,7 +5240,7 @@ impl DAGCircuit {
         // "first" nodes on each wire.
         let mut input_nodes: Vec<NodeIndex> = self
             .qargs_cache
-            .intern(qubits_id)
+            .get(qubits_id)
             .iter()
             .map(|q| self.qubit_io_map[q.0 as usize][0])
             .chain(all_cbits.iter().map(|c| self.clbit_io_map[c.0 as usize][0]))
@@ -5282,8 +5270,8 @@ impl DAGCircuit {
     fn sort_key(&self, node: NodeIndex) -> SortKeyType {
         match &self.dag[node] {
             NodeType::Operation(packed) => (
-                self.qargs_cache.intern(packed.qubits).as_slice(),
-                self.cargs_cache.intern(packed.clbits).as_slice(),
+                self.qargs_cache.get(packed.qubits),
+                self.cargs_cache.get(packed.clbits),
             ),
             NodeType::QubitIn(q) => (std::slice::from_ref(q), &[Clbit(u32::MAX)]),
             NodeType::QubitOut(_q) => (&[Qubit(u32::MAX)], &[Clbit(u32::MAX)]),
@@ -5687,18 +5675,16 @@ impl DAGCircuit {
             }
         } else if let Ok(op_node) = b.downcast::<DAGOpNode>() {
             let op_node = op_node.borrow();
-            let qubits = Interner::intern(
-                &mut self.qargs_cache,
+            let qubits = self.qargs_cache.insert_owned(
                 self.qubits
                     .map_bits(op_node.instruction.qubits.bind(py))?
                     .collect(),
-            )?;
-            let clbits = Interner::intern(
-                &mut self.cargs_cache,
+            );
+            let clbits = self.cargs_cache.insert_owned(
                 self.clbits
                     .map_bits(op_node.instruction.clbits.bind(py))?
                     .collect(),
-            )?;
+            );
             let params = (!op_node.instruction.params.is_empty())
                 .then(|| Box::new(op_node.instruction.params.clone()));
             let inst = PackedInstruction {
@@ -5739,8 +5725,8 @@ impl DAGCircuit {
             )?
             .into_any(),
             NodeType::Operation(packed) => {
-                let qubits = self.qargs_cache.intern(packed.qubits);
-                let clbits = self.cargs_cache.intern(packed.clbits);
+                let qubits = self.qargs_cache.get(packed.qubits);
+                let clbits = self.cargs_cache.get(packed.clbits);
                 Py::new(
                     py,
                     (
@@ -5955,18 +5941,18 @@ impl DAGCircuit {
             if let NodeType::Operation(ref mut new_inst) = new_node {
                 let new_qubit_indices: Vec<Qubit> = other
                     .qargs_cache
-                    .intern(new_inst.qubits)
+                    .get(new_inst.qubits)
                     .iter()
                     .map(|old_qubit| qubit_map[old_qubit])
                     .collect();
                 let new_clbit_indices: Vec<Clbit> = other
                     .cargs_cache
-                    .intern(new_inst.clbits)
+                    .get(new_inst.clbits)
                     .iter()
                     .map(|old_clbit| clbit_map[old_clbit])
                     .collect();
-                new_inst.qubits = Interner::intern(&mut self.qargs_cache, new_qubit_indices)?;
-                new_inst.clbits = Interner::intern(&mut self.cargs_cache, new_clbit_indices)?;
+                new_inst.qubits = self.qargs_cache.insert_owned(new_qubit_indices);
+                new_inst.clbits = self.cargs_cache.insert_owned(new_clbit_indices);
                 self.increment_op(new_inst.op.name());
             }
             let new_index = self.dag.add_node(new_node);
@@ -6128,7 +6114,7 @@ impl DAGCircuit {
             self._check_condition(py, inst.op.name(), condition.bind(py))?;
         }
 
-        for b in self.qargs_cache.intern(inst.qubits) {
+        for b in self.qargs_cache.get(inst.qubits) {
             if self.qubit_io_map.len() - 1 < b.0 as usize {
                 return Err(DAGCircuitError::new_err(format!(
                     "qubit {} not found in output map",
@@ -6137,7 +6123,7 @@ impl DAGCircuit {
             }
         }
 
-        for b in self.cargs_cache.intern(inst.clbits) {
+        for b in self.cargs_cache.get(inst.clbits) {
             if !self.clbit_io_map.len() - 1 < b.0 as usize {
                 return Err(DAGCircuitError::new_err(format!(
                     "clbit {} not found in output map",
diff --git a/crates/circuit/src/interner.rs b/crates/circuit/src/interner.rs
index e19f56e87a7d..25134bd9ea2e 100644
--- a/crates/circuit/src/interner.rs
+++ b/crates/circuit/src/interner.rs
@@ -10,79 +10,149 @@
 // copyright notice, and modified files need to carry a notice indicating
 // that they have been altered from the originals.
 
+use std::borrow::Borrow;
+use std::fmt;
 use std::hash::Hash;
-use std::sync::Arc;
+use std::marker::PhantomData;
 
-use hashbrown::HashMap;
-use pyo3::exceptions::PyRuntimeError;
-use pyo3::prelude::*;
+use indexmap::IndexSet;
 
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-pub struct Index(u32);
-
-impl IntoPy<PyObject> for Index {
-    fn into_py(self, py: Python<'_>) -> PyObject {
-        self.0.into_py(py)
-    }
+/// A key to retrieve a value (by reference) from an interner of the same type.  This is narrower
+/// than a true reference, at the cost that it is explicitly not lifetime bound to the interner it
+/// came from; it is up to the user to ensure that they never attempt to query an interner with a
+/// key from a different interner.
+#[derive(Debug, Eq, PartialEq)]
+pub struct Interned<T: ?Sized> {
+    index: u32,
+    // `Interned` is like a non-lifetime-bound reference to data stored in the interner.  Storing
+    // this adds a small amount more type safety to the interner keys when there's several interners
+    // in play close to each other.
+    _type: PhantomData<*const T>,
 }
-
-/// An append-only data structure for interning generic
-/// Rust types.
-#[derive(Clone, Debug)]
-pub struct IndexedInterner<T> {
-    entries: Vec<Arc<T>>,
-    index_lookup: HashMap<Arc<T>, Index>,
+// The `PhantomData` marker prevents various useful things from being derived (for `Clone` and
+// `Copy` it's an awkward effect of the derivation system), so we have manual implementations.
+impl<T: ?Sized> Clone for Interned<T> {
+    fn clone(&self) -> Self {
+        *self
+    }
 }
+impl<T: ?Sized> Copy for Interned<T> {}
+unsafe impl<T: ?Sized> Send for Interned<T> {}
+unsafe impl<T: ?Sized> Sync for Interned<T> {}
 
-pub trait Interner<K> {
-    type Output;
+/// An append-only data structure for interning generic Rust types.
+///
+/// The interner can lookup keys using a reference type, and will create the corresponding owned
+/// allocation on demand, if a matching entry is not already stored.  It returns manual keys into
+/// itself (the `Interned` type), rather than raw references; the `Interned` type is narrower than a
+/// true reference.
+///
+/// # Examples
+///
+/// ```rust
+/// let mut interner = Interner::<[usize]>::new();
+///
+/// // These are of type `Interned<[usize]>`.
+/// let empty = interner.insert(&[]);
+/// let other_empty = interner.insert(&[]);
+/// let key = interner.insert(&[0, 1, 2, 3, 4]);
+///
+/// assert_eq!(empty, other_empty);
+/// assert_ne!(empty, key);
+///
+/// assert_eq!(interner.get(empty), &[]);
+/// assert_eq!(interner.get(key), &[0, 1, 2, 3, 4]);
+/// ```
+#[derive(Default)]
+pub struct Interner<T: ?Sized + ToOwned>(IndexSet<<T as ToOwned>::Owned, ::ahash::RandomState>);
 
-    /// Takes ownership of the provided key and returns the interned
-    /// type.
-    fn intern(self, value: K) -> Self::Output;
+// `Clone` and `Debug` can't use the derivation mechanism because the values that are actually
+// stored are of type `<T as ToOwned>::Owned`, which the derive system doesn't reason about.
+impl<T> Clone for Interner<T>
+where
+    T: ?Sized + ToOwned,
+    <T as ToOwned>::Owned: Clone,
+{
+    fn clone(&self) -> Self {
+        Self(self.0.clone())
+    }
+}
+impl<T> fmt::Debug for Interner<T>
+where
+    T: ?Sized + ToOwned,
+    <T as ToOwned>::Owned: fmt::Debug,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+        f.debug_tuple("Interner").field(&self.0).finish()
+    }
 }
 
-impl<'a, T> Interner<Index> for &'a IndexedInterner<T> {
-    type Output = &'a T;
-
-    fn intern(self, index: Index) -> Self::Output {
-        let value = self.entries.get(index.0 as usize).unwrap();
-        value.as_ref()
+impl<T> Interner<T>
+where
+    T: ?Sized + ToOwned,
+{
+    pub fn new() -> Self {
+        Self(Default::default())
     }
 }
 
-impl<'a, T> Interner<T> for &'a mut IndexedInterner<T>
+impl<T> Interner<T>
 where
-    T: Eq + Hash,
+    T: ?Sized + ToOwned,
+    <T as ToOwned>::Owned: Hash + Eq,
 {
-    type Output = PyResult<Index>;
+    /// Retrieve a reference to the stored value for this key.
+    pub fn get(&self, index: Interned<T>) -> &T {
+        self.0
+            .get_index(index.index as usize)
+            .expect(
+                "the caller is responsible for only using interner keys from the correct interner",
+            )
+            .borrow()
+    }
 
-    fn intern(self, key: T) -> Self::Output {
-        if let Some(index) = self.index_lookup.get(&key).copied() {
-            Ok(index)
-        } else {
-            let args = Arc::new(key);
-            let index: Index = Index(self.entries.len().try_into().map_err(|_| {
-                PyRuntimeError::new_err("The interner has run out of indices (cache is full)!")
-            })?);
-            self.entries.push(args.clone());
-            self.index_lookup.insert_unique_unchecked(args, index);
-            Ok(index)
+    fn insert_new(&mut self, value: <T as ToOwned>::Owned) -> u32 {
+        let index = self.0.len();
+        if index == u32::MAX as usize {
+            panic!("interner is out of space");
         }
+        let _inserted = self.0.insert(value);
+        debug_assert!(_inserted);
+        index as u32
     }
-}
 
-impl<T: Eq + Hash> IndexedInterner<T> {
-    pub fn new() -> Self {
-        IndexedInterner {
-            entries: Vec::new(),
-            index_lookup: HashMap::new(),
+    /// Get an interner key corresponding to the given referenced type.  If not already stored, this
+    /// function will allocate a new owned value to use as the storage.
+    ///
+    /// If you already have an owned value, use `insert_owned`, but in general this function will be
+    /// more efficient *unless* you already had the value for other reasons.
+    pub fn insert(&mut self, value: &T) -> Interned<T>
+    where
+        T: Hash + Eq,
+    {
+        let index = match self.0.get_index_of(value) {
+            Some(index) => index as u32,
+            None => self.insert_new(value.to_owned()),
+        };
+        Interned {
+            index,
+            _type: PhantomData,
         }
     }
-}
 
-impl<T: Eq + Hash> Default for IndexedInterner<T> {
-    fn default() -> Self {
-        Self::new()
+    /// Get an interner key corresponding to the given owned type.  If not already stored, the value
+    /// will be used as the key, otherwise it will be dropped.
+    ///
+    /// If you don't already have the owned value, use `insert`; this will only allocate if the
+    /// lookup fails.
+    pub fn insert_owned(&mut self, value: <T as ToOwned>::Owned) -> Interned<T> {
+        let index = match self.0.get_index_of(&value) {
+            Some(index) => index as u32,
+            None => self.insert_new(value),
+        };
+        Interned {
+            index,
+            _type: PhantomData,
+        }
     }
 }
diff --git a/crates/circuit/src/packed_instruction.rs b/crates/circuit/src/packed_instruction.rs
index 7619ecb9c525..77ca0c6c02dd 100644
--- a/crates/circuit/src/packed_instruction.rs
+++ b/crates/circuit/src/packed_instruction.rs
@@ -25,9 +25,11 @@ use smallvec::SmallVec;
 use crate::circuit_data::CircuitData;
 use crate::circuit_instruction::ExtraInstructionAttributes;
 use crate::imports::{get_std_gate_class, DEEPCOPY};
+use crate::interner::Interned;
 use crate::operations::{
     Operation, OperationRef, Param, PyGate, PyInstruction, PyOperation, StandardGate,
 };
+use crate::{Clbit, Qubit};
 
 /// The logical discriminant of `PackedOperation`.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
@@ -491,9 +493,9 @@ impl Drop for PackedOperation {
 pub struct PackedInstruction {
     pub op: PackedOperation,
     /// The index under which the interner has stored `qubits`.
-    pub qubits: crate::interner::Index,
+    pub qubits: Interned<[Qubit]>,
     /// The index under which the interner has stored `clbits`.
-    pub clbits: crate::interner::Index,
+    pub clbits: Interned<[Clbit]>,
     pub params: Option<Box<SmallVec<[Param; 3]>>>,
     pub extra_attrs: Option<Box<ExtraInstructionAttributes>>,