Add solve_hci function (#34)

* add solve_hci general solver function * return addresses * update type annotations * remove unnecessary cast to np.ndarray * update docstring * add docstring for ci_strs
Qiskit · Oct 9, 2024 · ed5480a · ed5480a
1 parent ad4ef62
commit ed5480a
Show file tree

Hide file tree

Showing 2 changed files with 166 additions and 57 deletions.
diff --git a/qiskit_addon_dice_solver/__init__.py b/qiskit_addon_dice_solver/__init__.py
@@ -25,9 +25,10 @@
    solve_dice
 """
 
-from .dice_solver import solve_fermion, solve_dice
+from .dice_solver import solve_dice, solve_fermion, solve_hci
 
 __all__ = [
-    "solve_fermion",
     "solve_dice",
+    "solve_fermion",
+    "solve_hci",
 ]
diff --git a/qiskit_addon_dice_solver/dice_solver.py b/qiskit_addon_dice_solver/dice_solver.py
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+import math
 import os
 import shutil
 import struct
@@ -47,20 +48,25 @@ def __init__(self, command, returncode, log_path):
         super().__init__(message)
 
 
-def solve_fermion(
-    bitstring_matrix: np.ndarray,
-    /,
+def solve_hci(
     hcore: np.ndarray,
     eri: np.ndarray,
     *,
+    norb: int,
+    nelec: tuple[int, int],
+    ci_strs: tuple[Sequence[int], Sequence[int]] | None = None,
+    spin_sq: float = 0.0,
+    select_cutoff: float = 5e-4,
+    energy_tol: float = 1e-10,
+    max_iter: int = 10,
     mpirun_options: Sequence[str] | str | None = None,
     temp_dir: str | Path | None = None,
     clean_temp_dir: bool = True,
-) -> tuple[float, np.ndarray, tuple[np.ndarray, np.ndarray]]:
+) -> tuple[
+    float, np.ndarray, tuple[np.ndarray, np.ndarray], tuple[np.ndarray, np.ndarray]
+]:
     """
-    Approximate the ground state of a molecular Hamiltonian given a bitstring matrix defining the Hilbert subspace.
-
-    This solver is designed for compatibility with `qiskit-addon-sqd <https://qiskit.github.io/qiskit-addon-sqd/>`_ workflows.
+    Approximate the ground state of a molecular Hamiltonian using the heat bath configuration interaction method.
 
     In order to leverage the multi-processing nature of this tool, the user must specify
     the CPU resources to use via the `mpirun_options` argument.
@@ -74,7 +80,7 @@ def solve_fermion(
        # OR
        mpirun_opts = ["-quiet", "-n", "8"]
 
-       energy, sci_coeffs, avg_occs = solve_fermion(..., mpirun_options=mpirun_opts)
+       energy, sci_coeffs, sci_strings, avg_occs = solve_hci(..., mpirun_options=mpirun_opts)
 
     For more information on the ``mpirun`` command line options, refer to the `man page <https://www.open-mpi.org/doc/current/man1/mpirun.1.php>`_.
 
@@ -89,14 +95,19 @@ def solve_fermion(
        of ``40`` or fewer orbitals are supported.
 
     Args:
-        bitstring_matrix: A set of configurations defining the subspace onto which the Hamiltonian
-            will be projected and diagonalized. This is a 2D array of ``bool`` representations of bit
-            values such that each row represents a single bitstring. The spin-up configurations
-            should be specified by column indices in range ``(N, N/2]``, and the spin-down
-            configurations should be specified by column indices in range ``(N/2, 0]``, where ``N``
-            is the number of qubits.
-        hcore: Core Hamiltonian matrix representing single-electron integrals
-        eri: Electronic repulsion integrals representing two-electron integrals
+        hcore: Core Hamiltonian matrix representing single-electron integrals.
+        eri: Electronic repulsion integrals representing two-electron integrals.
+        norb: The number of spatial orbitals.
+        nelec: The numbers of spin up and spin down electrons.
+        ci_strs: CI strings specifying the subspace to use at the beginning of the first HCI iteration.
+            Should be specified as a pair of lists, with the first list containing the alpha strings and the
+            second list containing the beta strings. If not specified, only the Hartree-Fock string will be used.
+            A CI string is specified as an integer whose binary expansion encodes the string. For example,
+            the Hartree-Fock string with 3 electrons in 5 orbitals is `0b00111`.
+        spin_sq: Target value for the total spin squared for the ground state. If ``None``, no spin will be imposed.
+        select_cutoff: Cutoff threshold for retaining state vector coefficients.
+        energy_tol: Energy floating point tolerance.
+        max_iter: The maximum number of HCI iterations to perform.
         mpirun_options: Options controlling the CPU resource allocation for the ``Dice`` command line application.
             These command-line options will be passed directly to the ``mpirun`` command line application during
             invocation of ``Dice``. These may be formatted as a ``Sequence`` of strings or a single string. If a ``Sequence``,
@@ -113,45 +124,42 @@ def solve_fermion(
     Returns:
         - Minimum energy from SCI calculation
         - SCI coefficients
+        - SCI strings
         - Average orbital occupancy
     """
-    # Hard-code target S^2 until supported
-    spin_sq = 0.0
+    n_alpha, n_beta = nelec
 
-    # Convert bitstring matrix to integer determinants for spin-up/down
-    ci_strs = bitstring_matrix_to_ci_strs(bitstring_matrix)
-    num_configurations = len(ci_strs[0])
-    num_up = format(ci_strs[0][0], "b").count("1")
-    num_dn = format(ci_strs[1][0], "b").count("1")
+    if ci_strs is None:
+        # If CI strings not specified, use the Hartree-Fock bitstring
+        ci_strs = ([(1 << n_alpha) - 1], [(1 << n_beta) - 1])
 
     # Set up the temp directory
     temp_dir = temp_dir or tempfile.gettempdir()
     dice_dir = Path(tempfile.mkdtemp(prefix="dice_cli_files_", dir=temp_dir))
 
     # Write the integrals out as an FCI dump for Dice command line app
     active_space_path = dice_dir / "fcidump.txt"
-    num_orbitals = hcore.shape[0]
-    tools.fcidump.from_integrals(
-        active_space_path, hcore, eri, num_orbitals, (num_up + num_dn)
-    )
+    tools.fcidump.from_integrals(active_space_path, hcore, eri, norb, nelec)
 
     _write_input_files(
         addresses=ci_strs,
         active_space_path=active_space_path,
-        num_up=num_up,
-        num_dn=num_dn,
-        num_configurations=num_configurations,
+        norb=norb,
+        num_up=n_alpha,
+        num_dn=n_beta,
         dice_dir=dice_dir,
         spin_sq=spin_sq,
-        max_iter=1,
+        select_cutoff=select_cutoff,
+        energy_tol=energy_tol,
+        max_iter=max_iter,
     )
 
     # Navigate to dice dir and call Dice
     _call_dice(dice_dir, mpirun_options)
 
     # Read and convert outputs
-    e_dice, sci_coefficients, avg_occupancies = _read_dice_outputs(
-        dice_dir, num_orbitals
+    e_dice, sci_coefficients, addresses, avg_occupancies = _read_dice_outputs(
+        dice_dir, norb
     )
 
     # Clean up the temp directory of intermediate files, if desired
@@ -161,10 +169,99 @@ def solve_fermion(
     return (
         e_dice,
         sci_coefficients,
-        (avg_occupancies[:num_orbitals], avg_occupancies[num_orbitals:]),
+        addresses,
+        (avg_occupancies[:norb], avg_occupancies[norb:]),
     )
 
 
+def solve_fermion(
+    bitstring_matrix: np.ndarray,
+    /,
+    hcore: np.ndarray,
+    eri: np.ndarray,
+    *,
+    mpirun_options: Sequence[str] | str | None = None,
+    temp_dir: str | Path | None = None,
+    clean_temp_dir: bool = True,
+) -> tuple[float, np.ndarray, tuple[np.ndarray, np.ndarray]]:
+    """
+    Approximate the ground state of a molecular Hamiltonian given a bitstring matrix defining the Hilbert subspace.
+
+    This solver is designed for compatibility with `qiskit-addon-sqd <https://qiskit.github.io/qiskit-addon-sqd/>`_ workflows.
+
+    In order to leverage the multi-processing nature of this tool, the user must specify
+    the CPU resources to use via the `mpirun_options` argument.
+
+    For example, to use 8 CPU slots in parallel in quiet mode:
+
+    .. code-block:: python
+
+       # Run 8 parallel slots in quiet mode
+       mpirun_opts = "-quiet -n 8"
+       # OR
+       mpirun_opts = ["-quiet", "-n", "8"]
+
+       energy, sci_coeffs, avg_occs = solve_fermion(..., mpirun_options=mpirun_opts)
+
+    For more information on the ``mpirun`` command line options, refer to the `man page <https://www.open-mpi.org/doc/current/man1/mpirun.1.php>`_.
+
+    .. note::
+
+       Only closed-shell systems are supported. The particle number for both
+       spin-up and spin-down determinants is expected to be equal.
+
+    .. note::
+
+       Determinants are interpreted by the ``Dice`` command line application as 5-byte unsigned integers; therefore, only systems
+       of ``40`` or fewer orbitals are supported.
+
+    Args:
+        bitstring_matrix: A set of configurations defining the subspace onto which the Hamiltonian
+            will be projected and diagonalized. This is a 2D array of ``bool`` representations of bit
+            values such that each row represents a single bitstring. The spin-up configurations
+            should be specified by column indices in range ``(N, N/2]``, and the spin-down
+            configurations should be specified by column indices in range ``(N/2, 0]``, where ``N``
+            is the number of qubits.
+        hcore: Core Hamiltonian matrix representing single-electron integrals
+        eri: Electronic repulsion integrals representing two-electron integrals
+        mpirun_options: Options controlling the CPU resource allocation for the ``Dice`` command line application.
+            These command-line options will be passed directly to the ``mpirun`` command line application during
+            invocation of ``Dice``. These may be formatted as a ``Sequence`` of strings or a single string. If a ``Sequence``,
+            the elements will be combined into a single, space-delimited string and passed to
+            ``mpirun``. If the input is a single string, it will be passed to ``mpirun`` as-is. If no
+            ``mpirun_options`` are provided by the user, ``Dice`` will run on a single MPI slot. For more
+            information on the ``mpirun`` command line options, refer to the `man page <https://www.open-mpi.org/doc/current/man1/mpirun.1.php>`_.
+        temp_dir: An absolute path to a directory for storing temporary files. If not provided, the
+            system temporary files directory will be used.
+        clean_temp_dir: Whether to delete intermediate files generated by the ``Dice`` command line application.
+            These files will be stored in a directory created inside ``temp_dir``. If ``False``, then
+            this directory will be preserved.
+
+    Returns:
+        - Minimum energy from SCI calculation
+        - SCI coefficients
+        - Average orbital occupancy
+    """
+    ci_strs = bitstring_matrix_to_ci_strs(bitstring_matrix)
+    num_up = format(ci_strs[0][0], "b").count("1")
+    num_dn = format(ci_strs[1][0], "b").count("1")
+    e_dice, sci_coefficients, _, avg_occupancies = solve_hci(
+        hcore=hcore,
+        eri=eri,
+        norb=hcore.shape[0],
+        nelec=(num_up, num_dn),
+        ci_strs=ci_strs,
+        spin_sq=0.0,  # Hard-code target S^2 until supported
+        select_cutoff=1e12,
+        energy_tol=1e-10,
+        max_iter=1,
+        mpirun_options=mpirun_options,
+        temp_dir=temp_dir,
+        clean_temp_dir=clean_temp_dir,
+    )
+    return e_dice, sci_coefficients, avg_occupancies
+
+
 def solve_dice(
     addresses: tuple[Sequence[int], Sequence[int]],
     active_space_path: str | Path,
@@ -228,30 +325,31 @@ def solve_dice(
         Minimum energy from SCI calculation, SCI coefficients, and average orbital occupancy for spin-up and spin-down orbitals
     """
     # Write Dice inputs to working dir
-    num_configurations = len(addresses[0])
     num_up = bin(addresses[0][0])[2:].count("1")
     num_dn = bin(addresses[1][0])[2:].count("1")
 
     intermediate_dir = Path(tempfile.mkdtemp(prefix="dice_cli_files_", dir=working_dir))
 
+    mf_as = tools.fcidump.to_scf(active_space_path)
+    num_orbitals = mf_as.get_hcore().shape[0]
     _write_input_files(
-        addresses,
-        active_space_path,
-        num_up,
-        num_dn,
-        num_configurations,
-        intermediate_dir,
-        spin_sq,
-        max_iter,
+        addresses=addresses,
+        active_space_path=active_space_path,
+        norb=num_orbitals,
+        num_up=num_up,
+        num_dn=num_dn,
+        dice_dir=intermediate_dir,
+        spin_sq=spin_sq,
+        select_cutoff=1e12,
+        energy_tol=1e-10,
+        max_iter=max_iter,
     )
 
     # Navigate to working dir and call Dice
     _call_dice(intermediate_dir, mpirun_options)
 
     # Read outputs and convert outputs
-    mf_as = tools.fcidump.to_scf(active_space_path)
-    num_orbitals = mf_as.get_hcore().shape[0]
-    e_dice, sci_coefficients, avg_occupancies = _read_dice_outputs(
+    e_dice, sci_coefficients, _, avg_occupancies = _read_dice_outputs(
         intermediate_dir, num_orbitals
     )
     e_dice -= mf_as.mol.energy_nuc()
@@ -269,7 +367,7 @@ def solve_dice(
 
 def _read_dice_outputs(
     dice_dir: str | Path, num_orbitals: int
-) -> tuple[float, np.ndarray, np.ndarray]:
+) -> tuple[float, np.ndarray, tuple[np.ndarray, np.ndarray], np.ndarray]:
     """Calculate the estimated ground state energy and average orbitals occupancies from Dice outputs."""
     # Read in the avg orbital occupancies
     spin1_rdm_dice = np.loadtxt(os.path.join(dice_dir, "spin1RDM.0.0.txt"), skiprows=1)
@@ -290,9 +388,11 @@ def _read_dice_outputs(
     # Construct the SCI wavefunction coefficients from Dice output dets.bin
     occs, amps = _read_wave_function_magnitudes(os.path.join(dice_dir, "dets.bin"))
     addresses = _addresses_from_occupancies(occs)
-    sci_coefficients = _construct_ci_vec_from_addresses_amplitudes(amps, addresses)
+    sci_coefficients, addresses_a, addresses_b = (
+        _construct_ci_vec_from_addresses_amplitudes(amps, addresses)
+    )
 
-    return energy_dice, sci_coefficients, avg_occupancies
+    return energy_dice, sci_coefficients, (addresses_a, addresses_b), avg_occupancies
 
 
 def _call_dice(dice_dir: Path, mpirun_options: Sequence[str] | str | None) -> None:
@@ -326,11 +426,13 @@ def _call_dice(dice_dir: Path, mpirun_options: Sequence[str] | str | None) -> No
 def _write_input_files(
     addresses: tuple[Sequence[int], Sequence[int]],
     active_space_path: str | Path,
+    norb: int,
     num_up: int,
     num_dn: int,
-    num_configurations: int,
     dice_dir: str | Path,
     spin_sq: float,
+    select_cutoff: float,
+    energy_tol: float,
     max_iter: int,
 ) -> None:
     """Prepare the Dice inputs in the specified directory."""
@@ -349,17 +451,18 @@ def _write_input_files(
     # The Dice/Riken branch this package is built on modifies the normal behavior
     # of the SHCI application, so we hard-code this value to prevent unintended
     # behavior due to these modifications.
-    schedule = "schedule\n0 1.e+12\nend\n"
+    schedule = f"schedule\n0 {select_cutoff}\nend\n"
     # Floating point tolerance for Davidson solver
     davidson_tol = "davidsonTol 1e-5\n"
     # Energy floating point tolerance
-    de = "dE 1e-10\n"
+    de = f"dE {energy_tol}\n"
     # The maximum number of HCI iterations to perform
     maxiter = f"maxiter {max_iter}\n"
     # We don't want Dice to be noisyu for now so we hard code noio
     noio = "noio\n"
     # The number of determinants to write as output. We always want all of them.
-    write_best_determinants = f"writeBestDeterminants {num_configurations}\n"
+    dim = math.comb(norb, num_up) * math.comb(norb, num_dn)
+    write_best_determinants = f"writeBestDeterminants {dim}\n"
     # Number of perturbation theory parameters. Must be 0.
     n_pt_iter = "nPTiter 0\n"
     # Requested reduced density matrices
@@ -482,11 +585,13 @@ def _addresses_from_occupancies(occupancy_strs: list[str]) -> list[list[int]]:
 
 def _construct_ci_vec_from_addresses_amplitudes(
     amps: list[float], addresses: list[list[int]]
-) -> np.ndarray:
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
     """Construct wavefunction amplitudes from determinant addresses and their associated amplitudes."""
     uniques = np.unique(np.array(addresses))
     num_dets = len(uniques)
     ci_vec = np.zeros((num_dets, num_dets))
+    addresses_a = np.zeros(num_dets, dtype=np.int64)
+    addresses_b = np.zeros(num_dets, dtype=np.int64)
 
     addr_map = {uni_addr: i for i, uni_addr in enumerate(uniques)}
 
@@ -498,4 +603,7 @@ def _construct_ci_vec_from_addresses_amplitudes(
         j = addr_map[address_b]
         ci_vec[i, j] = amps[idx]
 
-    return ci_vec
+        addresses_a[i] = uniques[i]
+        addresses_b[j] = uniques[j]
+
+    return ci_vec, addresses_a, addresses_b