diff --git a/crates/accelerate/Cargo.toml b/crates/accelerate/Cargo.toml index cc624e7750d..0a3c295474a 100644 --- a/crates/accelerate/Cargo.toml +++ b/crates/accelerate/Cargo.toml @@ -9,6 +9,10 @@ license.workspace = true name = "qiskit_accelerate" doctest = false + +[features] +cache_pygates = ["qiskit-circuit/cache_pygates"] + [dependencies] rayon.workspace = true numpy.workspace = true @@ -60,6 +64,3 @@ features = ["ndarray"] [dependencies.pulp] version = "0.18.22" features = ["macro"] - -[features] -cache_pygates = ["qiskit-circuit/cache_pygates"] diff --git a/crates/accelerate/src/consolidate_blocks.rs b/crates/accelerate/src/consolidate_blocks.rs index 1edd592ce87..46c648af75c 100644 --- a/crates/accelerate/src/consolidate_blocks.rs +++ b/crates/accelerate/src/consolidate_blocks.rs @@ -107,7 +107,7 @@ pub(crate) fn consolidate_blocks( dag.get_qargs(inst.qubits), ) { all_block_gates.insert(inst_node); - let matrix = match get_matrix_from_inst(py, inst) { + let matrix = match get_matrix_from_inst(inst) { Ok(mat) => mat, Err(_) => continue, }; @@ -198,7 +198,7 @@ pub(crate) fn consolidate_blocks( *block_qargs.iter().min().unwrap(), *block_qargs.iter().max().unwrap(), ]; - let matrix = blocks_to_matrix(py, dag, &block, block_index_map).ok(); + let matrix = blocks_to_matrix(dag, &block, block_index_map).ok(); if let Some(matrix) = matrix { if force_consolidate || decomposer.num_basis_gates_inner(matrix.view()) < basis_count @@ -252,7 +252,7 @@ pub(crate) fn consolidate_blocks( first_qubits, ) { - let matrix = match get_matrix_from_inst(py, first_inst) { + let matrix = match get_matrix_from_inst(first_inst) { Ok(mat) => mat, Err(_) => continue, }; @@ -272,7 +272,7 @@ pub(crate) fn consolidate_blocks( already_in_block = true; } let gate = dag.dag()[*node].unwrap_operation(); - let operator = match get_matrix_from_inst(py, gate) { + let operator = match get_matrix_from_inst(gate) { Ok(mat) => mat, Err(_) => { // Set this to skip this run because we can't compute the matrix of the diff --git a/crates/accelerate/src/convert_2q_block_matrix.rs b/crates/accelerate/src/convert_2q_block_matrix.rs index aefc5976e82..c79bd655dce 100644 --- a/crates/accelerate/src/convert_2q_block_matrix.rs +++ b/crates/accelerate/src/convert_2q_block_matrix.rs @@ -31,7 +31,7 @@ use crate::euler_one_qubit_decomposer::matmul_1q; use crate::QiskitError; #[inline] -pub fn get_matrix_from_inst(py: Python, inst: &PackedInstruction) -> PyResult> { +pub fn get_matrix_from_inst(inst: &PackedInstruction) -> PyResult> { if let Some(mat) = inst.op.matrix(inst.params_view()) { Ok(mat) } else if inst.op.try_standard_gate().is_some() { @@ -39,13 +39,15 @@ pub fn get_matrix_from_inst(py: Python, inst: &PackedInstruction) -> PyResult>()? - .as_array() - .to_owned()) + Python::with_gil(|py| { + Ok(QI_OPERATOR + .get_bound(py) + .call1((gate.gate.clone_ref(py),))? + .getattr(intern!(py, "data"))? + .extract::>()? + .as_array() + .to_owned()) + }) } else { Err(QiskitError::new_err( "Can't compute matrix of non-unitary op", @@ -55,7 +57,6 @@ pub fn get_matrix_from_inst(py: Python, inst: &PackedInstruction) -> PyResult> = None; for node in op_list { let inst = dag.dag()[*node].unwrap_operation(); - let op_matrix = get_matrix_from_inst(py, inst)?; + let op_matrix = get_matrix_from_inst(inst)?; match dag .get_qargs(inst.qubits) .iter() diff --git a/crates/accelerate/src/lib.rs b/crates/accelerate/src/lib.rs index 45cf047a680..eab307859c4 100644 --- a/crates/accelerate/src/lib.rs +++ b/crates/accelerate/src/lib.rs @@ -56,6 +56,7 @@ pub mod synthesis; pub mod target_transpiler; pub mod twirling; pub mod two_qubit_decompose; +pub mod two_qubit_peephole; pub mod uc_gate; pub mod unitary_synthesis; pub mod utils; diff --git a/crates/accelerate/src/two_qubit_decompose.rs b/crates/accelerate/src/two_qubit_decompose.rs index 4410d6f35e0..d0217dd5ff8 100644 --- a/crates/accelerate/src/two_qubit_decompose.rs +++ b/crates/accelerate/src/two_qubit_decompose.rs @@ -1244,9 +1244,9 @@ type TwoQubitSequenceVec = Vec<(Option, SmallVec<[f64; 3]>, SmallV #[derive(Clone, Debug)] #[pyclass(sequence)] pub struct TwoQubitGateSequence { - gates: TwoQubitSequenceVec, + pub gates: TwoQubitSequenceVec, #[pyo3(get)] - global_phase: f64, + pub global_phase: f64, } impl TwoQubitGateSequence { @@ -1709,7 +1709,7 @@ impl TwoQubitBasisDecomposer { gate: String, gate_matrix: ArrayView2, basis_fidelity: f64, - euler_basis: &str, + euler_basis: EulerBasis, pulse_optimize: Option, ) -> PyResult { let ipz: ArrayView2 = aview2(&IPZ); @@ -1817,7 +1817,7 @@ impl TwoQubitBasisDecomposer { Ok(TwoQubitBasisDecomposer { gate, basis_fidelity, - euler_basis: EulerBasis::__new__(euler_basis)?, + euler_basis, pulse_optimize, basis_decomposer, super_controlled, @@ -1986,7 +1986,7 @@ impl TwoQubitBasisDecomposer { gate, gate_matrix.as_array(), basis_fidelity, - euler_basis, + EulerBasis::__new__(euler_basis)?, pulse_optimize, ) } @@ -2284,8 +2284,13 @@ fn two_qubit_decompose_up_to_diagonal( let (su4, phase) = u4_to_su4(mat_arr); let mut real_map = real_trace_transform(su4.view()); let mapped_su4 = real_map.dot(&su4.view()); - let decomp = - TwoQubitBasisDecomposer::new_inner("cx".to_string(), aview2(&CX_GATE), 1.0, "U", None)?; + let decomp = TwoQubitBasisDecomposer::new_inner( + "cx".to_string(), + aview2(&CX_GATE), + 1.0, + EulerBasis::__new__("U")?, + None, + )?; let circ_seq = decomp.call_inner(mapped_su4.view(), None, true, None)?; let circ = CircuitData::from_standard_gates( diff --git a/crates/accelerate/src/two_qubit_peephole.rs b/crates/accelerate/src/two_qubit_peephole.rs new file mode 100644 index 00000000000..cf852fbbf99 --- /dev/null +++ b/crates/accelerate/src/two_qubit_peephole.rs @@ -0,0 +1,428 @@ +// This code is part of Qiskit. +// +// (C) Copyright IBM 2024 +// +// This code is licensed under the Apache License, Version 2.0. You may +// obtain a copy of this license in the LICENSE.txt file in the root directory +// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +// +// Any modifications or derivative works of this code must retain this +// copyright notice, and modified files need to carry a notice indicating +// that they have been altered from the originals. + +use std::cmp::Ordering; +use std::sync::Mutex; + +use hashbrown::{HashMap, HashSet}; +use pyo3::prelude::*; +use rayon::prelude::*; +use rustworkx_core::petgraph::stable_graph::NodeIndex; +use smallvec::{smallvec, SmallVec}; + +use qiskit_circuit::circuit_instruction::ExtraInstructionAttributes; +use qiskit_circuit::dag_circuit::{DAGCircuit, NodeType}; +use qiskit_circuit::operations::{Operation, OperationRef, Param, StandardGate}; +use qiskit_circuit::packed_instruction::PackedOperation; +use qiskit_circuit::Qubit; + +use crate::convert_2q_block_matrix::blocks_to_matrix; +use crate::euler_one_qubit_decomposer::{ + EulerBasis, EulerBasisSet, EULER_BASES, EULER_BASIS_NAMES, +}; +use crate::nlayout::PhysicalQubit; +use crate::target_transpiler::Target; +use crate::two_qubit_decompose::{TwoQubitBasisDecomposer, TwoQubitGateSequence}; + +fn get_decomposers_from_target( + target: &Target, + qubits: &[Qubit], + fidelity: f64, +) -> PyResult> { + let physical_qubits = smallvec![PhysicalQubit(qubits[0].0), PhysicalQubit(qubits[1].0)]; + let gate_names = match target.operation_names_for_qargs(Some(&physical_qubits)) { + Ok(names) => names, + Err(_) => { + let reverse_qubits = physical_qubits.iter().rev().copied().collect(); + target + .operation_names_for_qargs(Some(&reverse_qubits)) + .unwrap() + } + }; + + let available_kak_gate: Vec<(&str, &PackedOperation, &[Param])> = gate_names + .iter() + .filter_map(|name| match target.operation_from_name(name) { + Ok(raw_op) => match raw_op.operation.view() { + OperationRef::Standard(_) | OperationRef::Gate(_) => { + Some((*name, &raw_op.operation, raw_op.params.as_slice())) + } + _ => None, + }, + Err(_) => None, + }) + .collect(); + + let single_qubit_basis_list = + target.operation_names_for_qargs(Some(&smallvec![physical_qubits[0]])); + let mut target_basis_set = EulerBasisSet::new(); + match single_qubit_basis_list { + Ok(basis_list) => { + EULER_BASES + .iter() + .enumerate() + .filter_map(|(idx, gates)| { + if !gates.iter().all(|gate| basis_list.contains(gate)) { + return None; + } + let basis = EULER_BASIS_NAMES[idx]; + Some(basis) + }) + .for_each(|basis| target_basis_set.add_basis(basis)); + } + Err(_) => target_basis_set.support_all(), + } + if target_basis_set.basis_supported(EulerBasis::U3) + && target_basis_set.basis_supported(EulerBasis::U321) + { + target_basis_set.remove(EulerBasis::U3); + } + if target_basis_set.basis_supported(EulerBasis::ZSX) + && target_basis_set.basis_supported(EulerBasis::ZSXX) + { + target_basis_set.remove(EulerBasis::ZSX); + } + + let euler_bases: Vec = target_basis_set.get_bases().collect(); + + available_kak_gate + .iter() + .filter_map(|(two_qubit_name, two_qubit_gate, params)| { + let matrix = two_qubit_gate.matrix(params); + matrix.map(|matrix| { + euler_bases.iter().map(move |euler_basis| { + TwoQubitBasisDecomposer::new_inner( + two_qubit_name.to_string(), + matrix.view(), + fidelity, + *euler_basis, + None, + ) + }) + }) + }) + .flatten() + .collect() +} + +#[inline] +fn score_sequence<'a>( + target: &'a Target, + kak_gate_name: &str, + sequence: impl Iterator, SmallVec<[Qubit; 2]>)> + 'a, +) -> f64 { + 1. - sequence + .map(|(gate, local_qubits)| { + let qubits = local_qubits + .iter() + .map(|qubit| PhysicalQubit(qubit.0)) + .collect::>(); + let name = match gate.as_ref() { + Some(g) => g.name(), + None => kak_gate_name, + }; + 1. - target.get_error(name, qubits.as_slice()).unwrap_or(0.) + }) + .product::() +} + +type MappingIterItem = Option<((TwoQubitGateSequence, String), [Qubit; 2])>; + +/// This transpiler pass can only run in a context where we've translated the circuit gates (or +/// where we know all gates have a matrix). If any gate identified in the run fails to have a +/// matrix defined (either in rust or python) it will be skipped +#[pyfunction] +pub(crate) fn two_qubit_unitary_peephole_optimize( + py: Python, + dag: &DAGCircuit, + target: &Target, + fidelity: f64, +) -> PyResult { + let runs: Vec> = dag.collect_2q_runs().unwrap(); + let node_mapping: HashMap = + HashMap::with_capacity(runs.iter().map(|run| run.len()).sum()); + let locked_node_mapping = Mutex::new(node_mapping); + + // Build a vec of all the best synthesized two qubit gate sequences from the collected runs. + // This is done in parallel + let run_mapping: PyResult> = runs + .par_iter() + .enumerate() + .map(|(run_index, node_indices)| { + let block_qubit_map = node_indices + .iter() + .find_map(|node_index| { + let inst = dag.dag()[*node_index].unwrap_operation(); + let qubits = dag.get_qargs(inst.qubits); + if qubits.len() == 2 { + if qubits[0] > qubits[1] { + Some([qubits[1], qubits[0]]) + } else { + Some([qubits[0], qubits[1]]) + } + } else { + None + } + }) + .unwrap(); + let matrix = blocks_to_matrix(dag, node_indices, block_qubit_map)?; + let decomposers = get_decomposers_from_target(target, &block_qubit_map, fidelity)?; + let mut decomposer_scores: Vec> = vec![None; decomposers.len()]; + + let order_sequence = + |(index_a, sequence_a): &(usize, (TwoQubitGateSequence, String)), + (index_b, sequence_b): &(usize, (TwoQubitGateSequence, String))| { + let score_a = match decomposer_scores[*index_a] { + Some(score) => score, + None => { + let score: f64 = + score_sequence( + target, + sequence_a.1.as_str(), + sequence_a.0.gates.iter().map( + |(gate, _params, local_qubits)| { + let qubits = local_qubits + .iter() + .map(|qubit| block_qubit_map[*qubit as usize]) + .collect(); + (*gate, qubits) + }, + ), + ); + decomposer_scores[*index_a] = Some(score); + score + } + }; + + let score_b = match decomposer_scores[*index_b] { + Some(score) => score, + None => { + let score: f64 = + score_sequence( + target, + sequence_b.1.as_str(), + sequence_b.0.gates.iter().map( + |(gate, _params, local_qubits)| { + let qubits = local_qubits + .iter() + .map(|qubit| block_qubit_map[*qubit as usize]) + .collect(); + (*gate, qubits) + }, + ), + ); + decomposer_scores[*index_b] = Some(score); + score + } + }; + score_a.partial_cmp(&score_b).unwrap_or(Ordering::Equal) + }; + + let sequence = decomposers + .iter() + .map(|decomposer| { + ( + decomposer + .call_inner(matrix.view(), None, true, None) + .unwrap(), + decomposer.gate_name().to_string(), + ) + }) + .enumerate() + .min_by(order_sequence) + .unwrap() + .1; + let mut original_err: f64 = 1.; + let mut outside_target = false; + for node_index in node_indices { + let NodeType::Operation(ref inst) = dag.dag()[*node_index] else { + unreachable!("All run nodes will be ops") + }; + let qubits = dag + .get_qargs(inst.qubits) + .iter() + .map(|qubit| PhysicalQubit(qubit.0)) + .collect::>(); + let name = inst.op.name(); + let gate_err = match target.get_error(name, qubits.as_slice()) { + Some(err) => 1. - err, + None => { + // If error rate is None this can mean either the gate is not supported + // in the target or the gate is ideal. We need to do a second lookup + // to determine if the gate is supported, and if it isn't we don't need + // to finish scoring because we know we'll use the synthesis output + let physical_qargs = + qubits.iter().map(|bit| PhysicalQubit(bit.0)).collect(); + if !target.instruction_supported(name, Some(&physical_qargs)) { + outside_target = true; + break; + } + 1. + } + }; + original_err *= gate_err; + } + let original_score = 1. - original_err; + let new_score: f64 = if !outside_target { + score_sequence( + target, + sequence.1.as_str(), + sequence + .0 + .gates + .iter() + .map(|(gate, _params, local_qubits)| { + let qubits = local_qubits + .iter() + .map(|qubit| block_qubit_map[*qubit as usize]) + .collect(); + (*gate, qubits) + }), + ) + } else { + 1. + }; + + if outside_target + || new_score > original_score + || (new_score == original_score + && sequence + .0 + .gates + .iter() + .filter(|(_, __, qubits)| qubits.len() == 2) + .count() + >= node_indices + .iter() + .filter(|node_index| { + let NodeType::Operation(ref inst) = dag.dag()[**node_index] else { + unreachable!("All run nodes will be ops") + }; + let qubits = dag.get_qargs(inst.qubits); + qubits.len() == 2 + }) + .count()) + { + return Ok(None); + } + // This is done at the end of the map in some attempt to minimize + // lock contention. If this were serial code it'd make more sense + // to do this as part of the iteration building the + let mut node_mapping = locked_node_mapping.lock().unwrap(); + for node in node_indices { + node_mapping.insert(*node, run_index); + } + Ok(Some((sequence, block_qubit_map))) + }) + .collect(); + + let run_mapping = run_mapping?; + // After we've computed all the sequences to execute now serially build up a new dag. + let mut processed_runs: HashSet = HashSet::with_capacity(run_mapping.len()); + let mut out_dag = dag.copy_empty_like(py, "alike")?; + let node_mapping = locked_node_mapping.into_inner().unwrap(); + for node in dag.topological_op_nodes()? { + match node_mapping.get(&node) { + Some(run_index) => { + if processed_runs.contains(run_index) { + continue; + } + if run_mapping[*run_index].is_none() { + let NodeType::Operation(ref instr) = dag.dag()[node] else { + unreachable!("Must be an op node") + }; + out_dag.push_back(py, instr.clone())?; + continue; + } + let (sequence, qubit_map) = &run_mapping[*run_index].as_ref().unwrap(); + for (gate, params, local_qubits) in &sequence.0.gates { + let qubits: Vec = local_qubits + .iter() + .map(|index| qubit_map[*index as usize]) + .collect(); + let out_params = if params.is_empty() { + None + } else { + Some(params.iter().map(|val| Param::Float(*val)).collect()) + }; + match gate { + Some(gate) => { + #[cfg(feature = "cache_pygates")] + { + out_dag.apply_operation_back( + py, + PackedOperation::from_standard(*gate), + qubits.as_slice(), + &[], + out_params, + ExtraInstructionAttributes::default(), + None, + ) + } + #[cfg(not(feature = "cache_pygates"))] + { + out_dag.apply_operation_back( + py, + PackedOperation::from_standard(*gate), + qubits.as_slice(), + &[], + out_params, + ExtraInstructionAttributes::default(), + ) + } + } + None => { + let gate = target.operation_from_name(sequence.1.as_str()).unwrap(); + #[cfg(feature = "cache_pygates")] + { + out_dag.apply_operation_back( + py, + gate.operation.clone(), + qubits.as_slice(), + &[], + out_params, + ExtraInstructionAttributes::default(), + None, + ) + } + #[cfg(not(feature = "cache_pygates"))] + { + out_dag.apply_operation_back( + py, + gate.operation.clone(), + qubits.as_slice(), + &[], + out_params, + ExtraInstructionAttributes::default(), + ) + } + } + }?; + } + out_dag.add_global_phase(py, &Param::Float(sequence.0.global_phase))?; + processed_runs.insert(*run_index); + } + None => { + let NodeType::Operation(ref instr) = dag.dag()[node] else { + unreachable!("Must be an op node") + }; + out_dag.push_back(py, instr.clone())?; + } + } + } + Ok(out_dag) +} + +pub fn two_qubit_peephole_mod(m: &Bound) -> PyResult<()> { + m.add_wrapped(wrap_pyfunction!(two_qubit_unitary_peephole_optimize))?; + Ok(()) +} diff --git a/crates/accelerate/src/unitary_synthesis.rs b/crates/accelerate/src/unitary_synthesis.rs index 62f41c78084..338e9481f05 100644 --- a/crates/accelerate/src/unitary_synthesis.rs +++ b/crates/accelerate/src/unitary_synthesis.rs @@ -666,6 +666,7 @@ fn get_2q_decomposers_from_target( if let Some(approx_degree) = approximation_degree { basis_2q_fidelity *= approx_degree; } + let basis_1q = EulerBasis::__new__(basis_1q)?; let decomposer = TwoQubitBasisDecomposer::new_inner( gate.operation.name().to_string(), gate.operation.matrix(&gate.params).unwrap().view(), @@ -761,7 +762,7 @@ fn get_2q_decomposers_from_target( pi_2_basis.to_string(), StandardGate::CXGate.matrix(&[]).unwrap().view(), fidelity, - basis_1q, + EulerBasis::__new__(basis_1q)?, Some(true), )?) } else { diff --git a/crates/pyext/src/lib.rs b/crates/pyext/src/lib.rs index d8e59e04e51..890cf486df8 100644 --- a/crates/pyext/src/lib.rs +++ b/crates/pyext/src/lib.rs @@ -28,6 +28,11 @@ where #[rustfmt::skip] #[pymodule] fn _accelerate(m: &Bound) -> PyResult<()> { + add_submodule( + m, + ::qiskit_accelerate::two_qubit_peephole::two_qubit_peephole_mod, + "two_qubit_peephole", + )?; add_submodule(m, ::qiskit_accelerate::barrier_before_final_measurement::barrier_before_final_measurements_mod, "barrier_before_final_measurement")?; add_submodule(m, ::qiskit_accelerate::basis::basis, "basis")?; add_submodule(m, ::qiskit_accelerate::check_map::check_map_mod, "check_map")?; diff --git a/qiskit/__init__.py b/qiskit/__init__.py index af543bea80c..9105e39fed9 100644 --- a/qiskit/__init__.py +++ b/qiskit/__init__.py @@ -107,6 +107,7 @@ sys.modules["qiskit._accelerate.inverse_cancellation"] = _accelerate.inverse_cancellation sys.modules["qiskit._accelerate.check_map"] = _accelerate.check_map sys.modules["qiskit._accelerate.filter_op_nodes"] = _accelerate.filter_op_nodes +sys.modules["qiskit._accelerate.two_qubit_peephole"] = _accelerate.two_qubit_peephole sys.modules["qiskit._accelerate.twirling"] = _accelerate.twirling sys.modules["qiskit._accelerate.high_level_synthesis"] = _accelerate.high_level_synthesis sys.modules["qiskit._accelerate.remove_identity_equiv"] = _accelerate.remove_identity_equiv diff --git a/qiskit/transpiler/passes/optimization/two_qubit_peephole.py b/qiskit/transpiler/passes/optimization/two_qubit_peephole.py new file mode 100644 index 00000000000..60536d09aa7 --- /dev/null +++ b/qiskit/transpiler/passes/optimization/two_qubit_peephole.py @@ -0,0 +1,59 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2017, 2024. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Splits each two-qubit gate in the `dag` into two single-qubit gates, if possible without error.""" + +from __future__ import annotations + +from qiskit.transpiler.basepasses import TransformationPass +from qiskit.transpiler.passmanager import PassManager +from qiskit.transpiler.passes.optimization import Collect2qBlocks, ConsolidateBlocks +from qiskit.transpiler.passes.synthesis import UnitarySynthesis +from qiskit.transpiler.target import Target +from qiskit.dagcircuit.dagcircuit import DAGCircuit +from qiskit._accelerate.two_qubit_peephole import two_qubit_unitary_peephole_optimize + + +class TwoQubitPeepholeOptimization(TransformationPass): + """Unified two qubit unitary peephole optimization""" + + def __init__( + self, + target: Target, + approximation_degree: float | None = 1.0, + method: str = "default", + plugin_config: dict = None, + ): + super().__init__() + self._target = target + self._approximation_degree = approximation_degree + self._pm = None + if method != "default": + self._pm = PassManager( + [ + Collect2qBlocks(), + ConsolidateBlocks( + target=self._target, approximation_degree=self._approximation_degree + ), + UnitarySynthesis( + target=target, + approximation_degree=approximation_degree, + method=method, + plugin_config=plugin_config, + ), + ] + ) + + def run(self, dag: DAGCircuit) -> DAGCircuit: + if self._pm is not None: + return self._pm.run(dag) + return two_qubit_unitary_peephole_optimize(dag, self._target, self._approximation_degree)