From 610cf94ce7f18cb6321d660d0560f45b7de95d61 Mon Sep 17 00:00:00 2001 From: Matthew Treinish Date: Fri, 8 Nov 2024 04:59:01 -0500 Subject: [PATCH 1/3] Use OnceLock instead of OnceCell OnceLock is a thread-safe version of OnceCell that enables us to use PackedInstruction from a threaded environment. There is some overhead associated with this, primarily in memory as the OnceLock is a larger type than a OnceCell. But the tradeoff is worth it to start leverage multithreading for circuits. Fixes #13219 --- crates/accelerate/src/unitary_synthesis.rs | 4 ++-- crates/circuit/src/circuit_data.rs | 12 ++++++------ crates/circuit/src/circuit_instruction.rs | 6 +++--- crates/circuit/src/converters.rs | 4 ++-- crates/circuit/src/dag_circuit.rs | 18 +++++++++--------- crates/circuit/src/dag_node.rs | 6 +++--- crates/circuit/src/packed_instruction.rs | 12 ++++++------ 7 files changed, 31 insertions(+), 31 deletions(-) diff --git a/crates/accelerate/src/unitary_synthesis.rs b/crates/accelerate/src/unitary_synthesis.rs index b5ef66db4f1e..f23bd0aaeece 100644 --- a/crates/accelerate/src/unitary_synthesis.rs +++ b/crates/accelerate/src/unitary_synthesis.rs @@ -12,7 +12,7 @@ #![allow(clippy::too_many_arguments)] #[cfg(feature = "cache_pygates")] -use std::cell::OnceCell; +use std::sync::OnceLock; use std::f64::consts::PI; use approx::relative_eq; @@ -149,7 +149,7 @@ fn apply_synth_sequence( params: new_params, extra_attrs: ExtraInstructionAttributes::default(), #[cfg(feature = "cache_pygates")] - py_op: OnceCell::new(), + py_op: OnceLock::new(), }; instructions.push(instruction); } diff --git a/crates/circuit/src/circuit_data.rs b/crates/circuit/src/circuit_data.rs index f680d96e47a8..365b1b5ad523 100644 --- a/crates/circuit/src/circuit_data.rs +++ b/crates/circuit/src/circuit_data.rs @@ -11,7 +11,7 @@ // that they have been altered from the originals. #[cfg(feature = "cache_pygates")] -use std::cell::OnceCell; +use std::sync::OnceLock; use crate::bit_data::BitData; use crate::circuit_instruction::{ @@ -302,7 +302,7 @@ impl CircuitData { params: inst.params.clone(), extra_attrs: inst.extra_attrs.clone(), #[cfg(feature = "cache_pygates")] - py_op: OnceCell::new(), + py_op: OnceLock::new(), }); } } else if copy_instructions { @@ -314,7 +314,7 @@ impl CircuitData { params: inst.params.clone(), extra_attrs: inst.extra_attrs.clone(), #[cfg(feature = "cache_pygates")] - py_op: OnceCell::new(), + py_op: OnceLock::new(), }); } } else { @@ -939,7 +939,7 @@ impl CircuitData { params, extra_attrs: ExtraInstructionAttributes::default(), #[cfg(feature = "cache_pygates")] - py_op: OnceCell::new(), + py_op: OnceLock::new(), }); res.track_instruction_parameters(py, res.data.len() - 1)?; } @@ -1048,7 +1048,7 @@ impl CircuitData { params, extra_attrs: ExtraInstructionAttributes::default(), #[cfg(feature = "cache_pygates")] - py_op: OnceCell::new(), + py_op: OnceLock::new(), }); res.track_instruction_parameters(py, res.data.len() - 1)?; } @@ -1106,7 +1106,7 @@ impl CircuitData { params, extra_attrs: ExtraInstructionAttributes::default(), #[cfg(feature = "cache_pygates")] - py_op: OnceCell::new(), + py_op: OnceLock::new(), }); Ok(()) } diff --git a/crates/circuit/src/circuit_instruction.rs b/crates/circuit/src/circuit_instruction.rs index e1bf3cfc2e55..e449ad660e38 100644 --- a/crates/circuit/src/circuit_instruction.rs +++ b/crates/circuit/src/circuit_instruction.rs @@ -11,7 +11,7 @@ // that they have been altered from the originals. #[cfg(feature = "cache_pygates")] -use std::cell::OnceCell; +use std::sync::OnceLock; use numpy::IntoPyArray; use pyo3::basic::CompareOp; @@ -236,7 +236,7 @@ pub struct CircuitInstruction { pub params: SmallVec<[Param; 3]>, pub extra_attrs: ExtraInstructionAttributes, #[cfg(feature = "cache_pygates")] - pub py_op: OnceCell>, + pub py_op: OnceLock>, } impl CircuitInstruction { @@ -301,7 +301,7 @@ impl CircuitInstruction { params, extra_attrs: ExtraInstructionAttributes::new(label, None, None, None), #[cfg(feature = "cache_pygates")] - py_op: OnceCell::new(), + py_op: OnceLock::new(), }) } diff --git a/crates/circuit/src/converters.rs b/crates/circuit/src/converters.rs index dea366d02ff6..030a582bdad7 100644 --- a/crates/circuit/src/converters.rs +++ b/crates/circuit/src/converters.rs @@ -11,7 +11,7 @@ // that they have been altered from the originals. #[cfg(feature = "cache_pygates")] -use std::cell::OnceCell; +use std::sync::OnceLock; use ::pyo3::prelude::*; use hashbrown::HashMap; @@ -126,7 +126,7 @@ pub fn dag_to_circuit( )), extra_attrs: instr.extra_attrs.clone(), #[cfg(feature = "cache_pygates")] - py_op: OnceCell::new(), + py_op: OnceLock::new(), }) } else { Ok(instr.clone()) diff --git a/crates/circuit/src/dag_circuit.rs b/crates/circuit/src/dag_circuit.rs index 10551963b6e1..00f9d77d6282 100644 --- a/crates/circuit/src/dag_circuit.rs +++ b/crates/circuit/src/dag_circuit.rs @@ -68,7 +68,7 @@ use std::convert::Infallible; use std::f64::consts::PI; #[cfg(feature = "cache_pygates")] -use std::cell::OnceCell; +use std::sync::OnceLock; static CONTROL_FLOW_OP_NAMES: [&str; 4] = ["for_loop", "while_loop", "if_else", "switch_case"]; static SEMANTIC_EQ_SYMMETRIC: [&str; 4] = ["barrier", "swap", "break_loop", "continue_loop"]; @@ -5147,7 +5147,7 @@ impl DAGCircuit { let py_op = if let Some(py_op) = py_op { py_op.into() } else { - OnceCell::new() + OnceLock::new() }; let packed_instruction = PackedInstruction { op, @@ -6193,7 +6193,7 @@ impl DAGCircuit { .then(|| Box::new(new_gate.1.iter().map(|x| Param::Float(*x)).collect())), extra_attrs: ExtraInstructionAttributes::default(), #[cfg(feature = "cache_pygates")] - py_op: OnceCell::new(), + py_op: OnceLock::new(), } } else { panic!("This method only works if provided index is an op node"); @@ -6276,12 +6276,12 @@ impl DAGCircuit { }; #[cfg(feature = "cache_pygates")] let py_op = match new_op.operation.view() { - OperationRef::Standard(_) => OnceCell::new(), - OperationRef::Gate(gate) => OnceCell::from(gate.gate.clone_ref(py)), + OperationRef::Standard(_) => OnceLock::new(), + OperationRef::Gate(gate) => OnceLock::from(gate.gate.clone_ref(py)), OperationRef::Instruction(instruction) => { - OnceCell::from(instruction.instruction.clone_ref(py)) + OnceLock::from(instruction.instruction.clone_ref(py)) } - OperationRef::Operation(op) => OnceCell::from(op.operation.clone_ref(py)), + OperationRef::Operation(op) => OnceLock::from(op.operation.clone_ref(py)), }; let inst = PackedInstruction { op: new_op.operation, @@ -6732,7 +6732,7 @@ impl DAGCircuit { params: instr.params.clone(), extra_attrs: instr.extra_attrs.clone(), #[cfg(feature = "cache_pygates")] - py_op: OnceCell::new(), + py_op: OnceLock::new(), }) }) .collect::>>()?; @@ -6994,7 +6994,7 @@ impl DAGCircuit { params: (!new_op.params.is_empty()).then(|| new_op.params.into()), extra_attrs, #[cfg(feature = "cache_pygates")] - py_op: py_op_cache.map(OnceCell::from).unwrap_or_default(), + py_op: py_op_cache.map(OnceLock::from).unwrap_or_default(), }); if let Some(weight) = self.dag.node_weight_mut(node_index) { *weight = new_weight; diff --git a/crates/circuit/src/dag_node.rs b/crates/circuit/src/dag_node.rs index 6c3a2d15fbad..f20da74188e1 100644 --- a/crates/circuit/src/dag_node.rs +++ b/crates/circuit/src/dag_node.rs @@ -11,7 +11,7 @@ // that they have been altered from the originals. #[cfg(feature = "cache_pygates")] -use std::cell::OnceCell; +use std::sync::OnceLock; use std::hash::Hasher; use crate::circuit_instruction::{CircuitInstruction, OperationFromPython}; @@ -241,7 +241,7 @@ impl DAGOpNode { instruction.operation = instruction.operation.py_deepcopy(py, None)?; #[cfg(feature = "cache_pygates")] { - instruction.py_op = OnceCell::new(); + instruction.py_op = OnceLock::new(); } } let base = PyClassInitializer::from(DAGNode { node: None }); @@ -293,7 +293,7 @@ impl DAGOpNode { params: self.instruction.params.clone(), extra_attrs: self.instruction.extra_attrs.clone(), #[cfg(feature = "cache_pygates")] - py_op: OnceCell::new(), + py_op: OnceLock::new(), }) } diff --git a/crates/circuit/src/packed_instruction.rs b/crates/circuit/src/packed_instruction.rs index af72b3226a7c..449c7eeecf8a 100644 --- a/crates/circuit/src/packed_instruction.rs +++ b/crates/circuit/src/packed_instruction.rs @@ -11,7 +11,7 @@ // that they have been altered from the originals. #[cfg(feature = "cache_pygates")] -use std::cell::OnceCell; +use std::sync::OnceLock; use std::ptr::NonNull; use pyo3::intern; @@ -504,17 +504,17 @@ pub struct PackedInstruction { pub extra_attrs: ExtraInstructionAttributes, #[cfg(feature = "cache_pygates")] - /// This is hidden in a `OnceCell` because it's just an on-demand cache; we don't create this - /// unless asked for it. A `OnceCell` of a non-null pointer type (like `Py`) is the same + /// This is hidden in a `OnceLock` because it's just an on-demand cache; we don't create this + /// unless asked for it. A `OnceLock` of a non-null pointer type (like `Py`) is the same /// size as a pointer and there are no runtime checks on access beyond the initialisation check, /// which is a simple null-pointer check. /// - /// WARNING: remember that `OnceCell`'s `get_or_init` method is no-reentrant, so the initialiser + /// WARNING: remember that `OnceLock`'s `get_or_init` method is no-reentrant, so the initialiser /// must not yield the GIL to Python space. We avoid using `GILOnceCell` here because it /// requires the GIL to even `get` (of course!), which makes implementing `Clone` hard for us. /// We can revisit once we're on PyO3 0.22+ and have been able to disable its `py-clone` /// feature. - pub py_op: OnceCell>, + pub py_op: OnceLock>, } impl PackedInstruction { @@ -581,7 +581,7 @@ impl PackedInstruction { } }; - // `OnceCell::get_or_init` and the non-stabilised `get_or_try_init`, which would otherwise + // `OnceLock::get_or_init` and the non-stabilised `get_or_try_init`, which would otherwise // be nice here are both non-reentrant. This is a problem if the init yields control to the // Python interpreter as this one does, since that can allow CPython to freeze the thread // and for another to attempt the initialisation. From 33ef7ebd8cb2f80cc75bd97bfc380ac78d9fd284 Mon Sep 17 00:00:00 2001 From: Matthew Treinish Date: Fri, 8 Nov 2024 07:42:01 -0500 Subject: [PATCH 2/3] Update twirling too --- crates/accelerate/src/twirling.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/accelerate/src/twirling.rs b/crates/accelerate/src/twirling.rs index 0867bc161555..29c9da3671bc 100644 --- a/crates/accelerate/src/twirling.rs +++ b/crates/accelerate/src/twirling.rs @@ -209,7 +209,7 @@ fn twirl_gate( params: None, extra_attrs: ExtraInstructionAttributes::new(None, None, None, None), #[cfg(feature = "cache_pygates")] - py_op: std::cell::OnceCell::new(), + py_op: std::sync::OnceLock::new(), }, )?; out_circ.push( @@ -221,7 +221,7 @@ fn twirl_gate( params: None, extra_attrs: ExtraInstructionAttributes::new(None, None, None, None), #[cfg(feature = "cache_pygates")] - py_op: std::cell::OnceCell::new(), + py_op: std::sync::OnceLock::new(), }, )?; @@ -235,7 +235,7 @@ fn twirl_gate( params: None, extra_attrs: ExtraInstructionAttributes::new(None, None, None, None), #[cfg(feature = "cache_pygates")] - py_op: std::cell::OnceCell::new(), + py_op: std::sync::OnceLock::new(), }, )?; out_circ.push( @@ -247,7 +247,7 @@ fn twirl_gate( params: None, extra_attrs: ExtraInstructionAttributes::new(None, None, None, None), #[cfg(feature = "cache_pygates")] - py_op: std::cell::OnceCell::new(), + py_op: std::sync::OnceLock::new(), }, )?; @@ -361,7 +361,7 @@ fn generate_twirled_circuit( )), extra_attrs: inst.extra_attrs.clone(), #[cfg(feature = "cache_pygates")] - py_op: std::cell::OnceCell::new(), + py_op: std::sync::OnceLock::new(), }; #[cfg(feature = "cache_pygates")] new_inst.py_op.set(new_inst_obj).unwrap(); From 6481eab43865297bf48377fa4aa6197f7e88a0cc Mon Sep 17 00:00:00 2001 From: Matthew Treinish Date: Fri, 8 Nov 2024 07:38:44 -0500 Subject: [PATCH 3/3] Perform BarrierBeforeFinalMeasurements analysis in paralle With #13410 removing the non-threadsafe structure from our circuit representation we're now able to read and iterate over a DAGCircuit from multiple threads. This commit is the first small piece doing this, it moves the analysis portion of the BarrierBeforeFinalMeasurements pass to execure in parallel. The pass checks every node to ensure all it's decendents are either a measure or a barrier before reaching the end of the circuit. This commit iterates over all the nodes and does the check in parallel. --- crates/accelerate/src/barrier_before_final_measurement.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/accelerate/src/barrier_before_final_measurement.rs b/crates/accelerate/src/barrier_before_final_measurement.rs index b42be53f231c..245bc7070c7b 100644 --- a/crates/accelerate/src/barrier_before_final_measurement.rs +++ b/crates/accelerate/src/barrier_before_final_measurement.rs @@ -12,6 +12,7 @@ use hashbrown::HashSet; use pyo3::prelude::*; +use rayon::prelude::*; use rustworkx_core::petgraph::stable_graph::NodeIndex; use qiskit_circuit::circuit_instruction::ExtraInstructionAttributes; @@ -30,8 +31,8 @@ pub fn barrier_before_final_measurements( dag: &mut DAGCircuit, label: Option, ) -> PyResult<()> { - let final_ops: HashSet = dag - .op_nodes(true) + let node_indices: Vec = dag.op_nodes(true).collect(); + let final_ops: HashSet = node_indices.into_par_iter() .filter(|node| { let NodeType::Operation(ref inst) = dag.dag()[*node] else { unreachable!();