initial commit

pc2 · Mar 27, 2024 · e33ad60 · e33ad60
commit e33ad60
Show file tree

Hide file tree

Showing 13 changed files with 2,972 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,11 @@
+*.o
+*.x
+data
+*.mod
+*.dat
+*.h5
+pqdts_env
+timing_*
+status_*
+result_*
+povm.p
diff --git a/Makefile b/Makefile
@@ -0,0 +1,32 @@
+#adapt the following to your system
+CC = g++ # C++ compiler
+FCC = gfortran #fortran compiler
+PFCC = mpif90 #fortran compile with MPI
+CFLAGS = -O3 -march=native -fopenmp -g -flto -fno-strict-aliasing
+CFFLAGS = -ffree-line-length-none -Werror=aliasing -Werror=ampersand -Werror=c-binding-type -Werror=intrinsic-shadow -Werror=intrinsics-std -Werror=line-truncation -Werror=tabs -Werror=target-lifetime -Werror=underflow -Werror=unused-but-set-variable -Werror=unused-variable -Werror=unused-parameter -Werror=unused-label -Werror=conversion -Werror=zerotrip -Wno-maybe-uninitialized -Wuninitialized -Wuse-without-only -fno-strict-aliasing
+
+#don't change the lines below
+NAME = pqdts_omp.x
+PNAME = pqdts_omp_mpi.x
+OBJECTS = condat_simplexproj.o pqdts.o pqdts_mpi.o
+
+pqdts: condat_simplexproj.o pqdts.o
+	$(FCC) -o $(NAME) condat_simplexproj.o pqdts.o $(CFLAGS)
+
+pqdts_mpi: condat_simplexproj.o pqdts_mpi.o
+	$(PFCC) -o $(PNAME) -DMPI condat_simplexproj.o pqdts_mpi.o $(CFLAGS)
+
+condat_simplexproj.o: condat_simplexproj.c
+	$(CC) -c $(CFLAGS) $<
+
+pqdts.o: pqdts.f90
+	$(FCC) -c $(CFLAGS) $(CFFLAGS) -cpp pqdts.f90 -o pqdts.o
+
+pqdts_mpi.o: pqdts.f90
+	$(PFCC) -c $(CFLAGS) -DMPI $(CFFLAGS) -cpp pqdts.f90 -o pqdts_mpi.o
+
+pretty:
+	fprettify -i 2 pqdts.f90
+
+clean:
+	rm -f condat_simplexproj.o pqdts.o pqdts_mpi.o basics.mod $(NAME) $(PNAME)
diff --git a/README.md b/README.md
@@ -0,0 +1,149 @@
+# Parallel Quantum Detector Tomography Solver (pqdts)
+## Problem Statement
+This program solves the detector tomography problem for a phase-insensitive detector
+$$\mathrm{min}\quad \|\|P-F\varPi\|\|^2_{2} + \gamma \sum_{n=0}^{N-1}\sum_{i=0}^{M-1}(\varPi_{i,n}-\varPi_{i+1,n})^2$$
+with 
+
+$$ \varPi \in \mathbb{R}^{M\times N} $$
+
+$$ P \in \mathbb{R}^{D\times N} $$
+
+$$ F \in \mathbb{R}^{D\times M} $$
+
+$$\gamma \ge 0$$
+
+and the conditions
+
+$$\varPi_{i,n}\ge 0 \ \forall i \in \{0,...,M-1\}, \ n\in \{0,...,N-1\}$$
+
+$$\sum_{n=0}^{N-1}\varPi_{i,n}=1 \ \forall i \in \{0,...,M-1\}$$
+
+It uses a two-stage approach based on a two-metric projected truncated Newton method and is parallelized with MPI and OpenMP. Scalability to hundreds of compute nodes and more than 100.000 CPU cores has been successfully demonstrated. Details are documented in the corresponding publication.
+
+## How to Build
+### Requirements
+* a C compiler (e.g. gcc) and a Fortran compiler (e.g. gfortan) supporting OpenMP
+* for the MPI version additionally an MPI implementation, e.g., OpenMPI
+
+### Build Instructions
+* adjust the build options in the `Makefile` according to your system
+* run `make pqdts` for the OpenMP-parallelized version
+* run `make pqdts_mpi` for the MPI-and-OpenMP-parallelized version
+
+## How to Use
+### Python Wrapper
+The most convenient way of using the program is with the included Python wrapper `pqdts.py` and supports input matrices ($P$,$F$) from numpy (dense, npz) and scipy (sparse, npy). The Python wrapper only supports the OpenMP-parallelized version because the running the MPI-parallelized version usually requires an adaption to the HPC system where is should run, e.g., the MPI-wrapper of the system like `mpirun`, `mpiexec`, `srun` or others.
+
+This wrapper has the following options
+```
+usage: pqdts.py [-h] -P PMATRIX [-F FMATRIX] [-D DMAX] [-t THREADS] -p
+                PQDTSPATH [-o OUTPUT] [-e EPSILON] [-g GAMMA] [-m MAXITER]
+                [-b] [-v]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -P PMATRIX, --Pmatrix PMATRIX
+                        path to npz file (scipy sparse) or npy file (numpy) of
+                        P matrix (dimension D x N)
+  -F FMATRIX, --Fmatrix FMATRIX
+                        path to npz file (scipy sparse) or npy file (numpy) of
+                        F matrix (dimension D x M)
+  -D DMAX, --Dmax DMAX  truncate to D so that P is a D x M matrix
+  -t THREADS, --threads THREADS
+                        numper of OpenMP threads to use
+  -p PQDTSPATH, --pqdtspath PQDTSPATH
+                        path to compiled pqdts_omp.x
+  -o OUTPUT, --output OUTPUT
+                        output file for povm as pickle
+  -e EPSILON, --epsilon EPSILON
+                        convergence parameter of minimization
+  -g GAMMA, --gamma GAMMA
+                        regularization parameter
+  -m MAXITER, --maxiter MAXITER
+                        maximal number of iterations
+  -b, --benchmark       benchmark mode, don't write output POVMs
+  -v, --verbose         be more verbose
+```
+The dependencies of the wrapper can be installed with `pip install -r requirements.txt`.
+
+### Command Line Arguments
+Without the Python wrapper, the command line arguments of `pqdts_omp.x` and `pqdts_omp_mpi.x` are:
+1. size of first dimension of $\varPi$
+2. size of second dimension of $\varPi$
+3. size of first dimension of $P$
+4. number of non-zero elements in $F$ or -1 to calculate $F$ internally
+5. number of non-zero elements in $P$
+6. computation mode: 2 for two-metric projected truncated Newton method, 1 for projected gradient method 
+7. maxiter: maximal number of iterations in stages
+8. output: 0 for no output of the POVMs, 0 to disable output of POVMs, 1 to enable output of POVMs after every minimization stage
+9. gamma: regularization parameter $\gamma$
+10. epsilon: value of the convergence criterion
+11. index of stage to start with, i.e., 1 or 2
+12. 0 to start with the initial guess $\varPi=1/N$, 1 to read the output from a previous run as a starting point
+13. smoothing distance factor $N_s$
+
+### Inputs
+Without the Python wrapper, the programs `pqdts_omp.x` and `pqdts_omp_mpi.x` expect inputs ($P$ matrix and optionally the $F$ matrix) in the `data` directory in the current working directory. The following files are expected:
+* `data/P_row.bin` row indices of elements in $P$ in Fortran binary format
+* `data/P_col.bin` column indices of elements in $P$ in Fortran binary format
+* `data/P_data.bin` values of elements in $P$ in Fortran binary format
+For $F$ in addition:
+* `data/F_row.bin` row indices of elements in $F$ in Fortran binary format
+* `data/F_col.bin` column indices of elements in $F$ in Fortran binary format
+* `data/F_data.bin` values of elements in $F$ in Fortran binary format
+For details have a look at the routine `read_sparse_from_python` in `pqdts.f90` andthe Python wrapper `pqdts.py`.
+
+For reading in existing POVMs as a starting point for the minimization:
+* files of the form `"rank_"+rank+"_oiter"+oiter+".dat"` that were written by pqdts.
+
+## Outputs
+* std-out: progress information
+* `"rank_"+rank+"_oiter"+oiter+".dat"`: files containing the POVMs in Fortran file format of rank `rank` after stage `oiter`
+* `"timing_"+rank+".out"` file containing timing information of the seperate stages and timed routines
+* `"status_"+rank+".out"` content of `/proc/self/status` at the end of the program execution to retreive memory usage from high-water mark
+
+# Wigner Functions for High Photon Numbers
+The computation of the Wigner function for very high photon numbers is not possible with conventional implementations because it requires the representability of very large floating-point numbers that exceed the range of 64-bit floating-pint numbers. To be able to use arbitrary-precision floating-point numbers we make use of the multiple dispatch feature of the Julia programming language. With this, the Wigner-function implmentation of QuantumOptics.jl (https://qojulia.org/, functions `wigner` and `_clenshaw` from https://github.com/qojulia/QuantumOptics.jl/blob/v1.0.15/src/phasespace.jl) can be generalized to arbitrary-precision floating-point numbers. Julia uses the GNU MPFR library for the BigFloat data type.
+
+We have optimized the implementation for phase-insensitive detectors, i.e., diagonal density matrices and have trivially parallelized the computation because computations with arbitrary-precision floating-point numbers are much more cumbersome than with double-precision floating-point numbers.
+
+## How to Build
+A Julia installation is required to run the Wigner-programs. See https://julialang.org/downloads/ for details. The Julia packages that the program depends on can be installed with `julia wigner_init.jl`.
+
+## How to Use
+### Variants
+There are three variants:
+* `wigner_fp64.jl`: using conventional 64-bit floating-point numbers and can be used to explore the need the larger arbitrary-precision floating-point numbers
+* `wigner_mpfr.jl`: using arbitrary-precision floating-point numbers
+* `wigner_mpfr_mpi.jl`: using arbitrary-precision floating-point numbers and a trivial MPI-parallelization to scale to be able to many CPU-cores/compute nodes. This variant needs to be run as an MPI program, e.g, `mpirun julia wigner_mpfr_mpi.jl ...`. 
+
+### Command Line Arguments
+The three variants take the following command line arguments
+1. input hdf5-file
+2. n: name of dataset in hdf5 file 
+3. number of points to evualate the wigner function on in positive x-direction
+4. maximal x-argument for Wigner function
+5. precision: for `wigner_mpfr.jl` and `wigner_mpfr_mpi.jl` the bit size of the mantissa
+
+### Input HDF5-File
+The input HDF5 file is expected to have a one-dimensional data set named `n` containing the POVM. It can be created with the python script from $\varPi$ 
+```
+import h5py
+#povm (Pi) is a MxN numpy matrix 
+mat=np.asarray(povm)
+hf = h5py.File(filename, 'w')
+for i in range(N):
+    hf.create_dataset(str(i), data=mat[:,i])
+hf.close()
+```
+
+### Output Files 
+The scripts plot the Wigner functions and store the plot as `"n_"+(n)*"_M_"+(M)+"_precision_"+(precision)+".png"`. For plotting with Python the Wigner function is written as an HDF5-file `n_"+(n)+"_M_"+(M)+"_precision_"+(precision)+".h5", "w")` with group name `wigner` and data sets `x` for the points on the x-axis and `w` for the values of the Wigner function on these points.
+
+This HDF5-file can be read in Python with
+```
+import h5py
+hf = h5py.File(filename, 'r')
+x=hf['wigner']["x"][:]
+w=hf['wigner']["w"][:]
+```
diff --git a/condat_simplexproj.c b/condat_simplexproj.c
@@ -0,0 +1,96 @@
+/*
+ #  File            : condat_simplexproj.c 
+ #
+ #  Version History : 1.0, Aug. 15, 2014 
+ #
+ #  Author          : Laurent Condat, PhD, CNRS research fellow in France.
+ #
+ #  Description     : This file contains an implementation in the C language
+ #                    of algorithms described in the research paper:
+ #	
+ #                    L. Condat, "Fast Projection onto the Simplex and the
+ #                    l1 Ball", preprint Hal-01056171, 2014.
+ #
+ #                    This implementation comes with no warranty: due to the
+ #                    limited number of tests performed, there may remain
+ #                    bugs. In case the functions would not do what they are
+ #                    supposed to do, please email the author (contact info
+ #                    to be found on the web).
+ #
+ #                    If you use this code or parts of it for any purpose,
+ #                    the author asks you to cite the paper above or, in 
+ #                    that event, its published version. Please email him if 
+ #                    the proposed algorithms were useful for one of your 
+ #                    projects, or for any comment or suggestion.
+ #
+ #  Usage rights    : Copyright Laurent Condat.
+ #                    This file is distributed under the terms of the CeCILL
+ #                    licence (compatible with the GNU GPL), which can be
+ #                    found at the URL "http://www.cecill.info".
+ #
+ #  This software is governed by the CeCILL license under French law and
+ #  abiding by the rules of distribution of free software. You can  use,
+ #  modify and or redistribute the software under the terms of the CeCILL
+ #  license as circulated by CEA, CNRS and INRIA at the following URL :
+ #  "http://www.cecill.info".
+ #
+ #  As a counterpart to the access to the source code and rights to copy,
+ #  modify and redistribute granted by the license, users are provided only
+ #  with a limited warranty  and the software's author,  the holder of the
+ #  economic rights,  and the successive licensors  have only  limited
+ #  liability.
+ #
+ #  In this respect, the user's attention is drawn to the risks associated
+ #  with loading,  using,  modifying and/or developing or reproducing the
+ #  software by the user in light of its specific status of free software,
+ #  that may mean  that it is complicated to manipulate,  and  that  also
+ #  therefore means  that it is reserved for developers  and  experienced
+ #  professionals having in-depth computer knowledge. Users are therefore
+ #  encouraged to load and test the software's suitability as regards their
+ #  requirements in conditions enabling the security of their systems and/or
+ #  data to be ensured and,  more generally, to use and operate it in the
+ #  same conditions as regards security.
+ #
+ #  The fact that you are presently reading this means that you have had
+ #  knowledge of the CeCILL license and that you accept its terms.
+*/
+
+#include <cstdlib>
+extern "C"{
+void simplexproj_condat_(double* y, double* x, int* plength) {
+        int length=plength[0];
+	double*	aux = (x==y ? (double*)malloc(length*sizeof(double)) : x);
+	double*  aux0=aux;
+	int		auxlength=1; 
+	int		auxlengthold=-1;	
+	double	tau=(*aux=*y)-1.0;
+	int 	i=1;
+	for (; i<length; i++) 
+		if (y[i]>tau) {
+			if ((tau+=((aux[auxlength]=y[i])-tau)/(auxlength-auxlengthold))
+			<=y[i]-1.0) {
+				tau=y[i]-1.0;
+				auxlengthold=auxlength-1;
+			}
+			auxlength++;
+		} 
+	if (auxlengthold>=0) {
+		auxlength-=++auxlengthold;
+		aux+=auxlengthold;
+		while (--auxlengthold>=0) 
+			if (aux0[auxlengthold]>tau) 
+				tau+=((*(--aux)=aux0[auxlengthold])-tau)/(++auxlength);
+	}
+	do {
+		auxlengthold=auxlength-1;
+		for (i=auxlength=0; i<=auxlengthold; i++)
+			if (aux[i]>tau) 
+				aux[auxlength++]=aux[i];	
+			else 
+				tau+=(tau-aux[i])/(auxlengthold-i+auxlength);
+	} while (auxlength<=auxlengthold);
+	for (i=0; i<length; i++)
+		x[i]=(y[i]>tau ? y[i]-tau : 0.0); 
+	if (x==y) free(aux0);
+}
+} 
diff --git a/job.sh b/job.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+#SBATCH -t 1-0
+#SBATCH -N 1
+#SBATCH -n 1
+#SBATCH --exclusive
+#SBATCH --cpus-per-task=128
+
+
+module reset
+module load toolchain/foss/2023b
+module load lang/Python/3.11.5-GCCcore-13.2.0
+source ./pqdts_env/bin/activate
+
+export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
+export OMP_PLACES=cores
+export OMP_PROC_BIND=true
+
+D=$1
+mkdir simulated_D_8200_Dmax_${D}
+cd simulated_D_8200_Dmax_${D}
+python3 ../pqdts.py -P ../../det-tomo/simulation/data/simulatedP_D_8200.npz -t $SLURM_CPUS_PER_TASK --pqdtspath ../pqdts_omp.x -m 1000 -g 0.0 -e 0.000000001 -D $D -v -b > simulated_D_8200_Dmax_${D}.out
diff --git a/overflow.jl b/overflow.jl
@@ -0,0 +1,13 @@
+
+function main()
+  setprecision(128)
+  x=Float64(2.0)
+  x2=BigFloat(2.0)
+  for i in 1:5000
+    x=x^1.01
+    x2=x2^1.01
+    println(i," ",x," ",x2)
+  end
+end
+
+main()