Skip to content

Commit

Permalink
added dry-run option to convert python inputs for latter large-scale …
Browse files Browse the repository at this point in the history
…mpi-run, additional POVM ouput at end instead of after every stage, on-node compression of output POVMs before writing to parallel file system, additionalö option to disable benchmark of underlying operations
  • Loading branch information
robertschade committed Apr 8, 2024
1 parent 3cbaf9c commit b44d06b
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 37 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
CC = g++ # C++ compiler
FCC = gfortran #fortran compiler
PFCC = mpif90 #fortran compile with MPI
CFLAGS = -O3 -march=native -fopenmp -g -flto -fno-strict-aliasing
CFLAGS = -O3 -march=native -fopenmp -g -fargument-noalias -ffreestanding -finline-functions -funroll-all-loops -fprefetch-loop-arrays
CFFLAGS = -ffree-line-length-none -Werror=aliasing -Werror=ampersand -Werror=c-binding-type -Werror=intrinsic-shadow -Werror=intrinsics-std -Werror=line-truncation -Werror=tabs -Werror=target-lifetime -Werror=underflow -Werror=unused-but-set-variable -Werror=unused-variable -Werror=unused-parameter -Werror=unused-label -Werror=conversion -Werror=zerotrip -Wno-maybe-uninitialized -Wuninitialized -Wuse-without-only -fno-strict-aliasing

#don't change the lines below
Expand Down
22 changes: 11 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,15 @@ The most convenient way of using the program is with the included Python wrapper
This wrapper has the following options:

```
usage: pqdts.py [-h] -P PMATRIX [-F FMATRIX] [-D DMAX] [-t THREADS] -p
PQDTSPATH [-o OUTPUT] [-e EPSILON] [-g GAMMA] [-m MAXITER]
[-b] [-v]
usage: pqdts.py [-h] -P PMATRIX [-F FMATRIX] [-D DMAX] [-t THREADS] -p PQDTSPATH [-o OUTPUT] [-e EPSILON] [-g GAMMA] [-m MAXITER] [-T] [-b] [-d] [-v]
optional arguments:
options:
-h, --help show this help message and exit
-P PMATRIX, --Pmatrix PMATRIX
path to npz file (scipy sparse) or npy file (numpy) of
P matrix (dimension D x N)
path to npz file (scipy sparse) or npy file (numpy) of P matrix (dimension D x N)
-F FMATRIX, --Fmatrix FMATRIX
path to npz file (scipy sparse) or npy file (numpy) of
F matrix (dimension D x M)
-D DMAX, --Dmax DMAX truncate to D so that P is a D x M matrix
path to npz file (scipy sparse) or npy file (numpy) of F matrix (dimension D x M)
-D DMAX, --Dmax DMAX truncate to D so that P is a D x N matrix
-t THREADS, --threads THREADS
numper of OpenMP threads to use
-p PQDTSPATH, --pqdtspath PQDTSPATH
Expand All @@ -62,9 +58,12 @@ optional arguments:
regularization parameter
-m MAXITER, --maxiter MAXITER
maximal number of iterations
-b, --benchmark benchmark mode, don't write output POVMs
-T, --timing measure timing for reconstruction, don't write output POVMs
-b, --benchmarkops measure timing for underlying operations
-d, --dryrun dry-run: only prepare inputs for pqdts
-v, --verbose be more verbose
```

The dependencies of the wrapper can be installed with `pip3 install -r requirements.txt`.

### Command Line Arguments
Expand All @@ -76,12 +75,13 @@ Without the Python wrapper, the command line arguments of `pqdts_omp.x` and `pqd
5. number of non-zero elements in $P$
6. computation mode: 2 for two-metric projected truncated Newton method, 1 for projected gradient method
7. maxiter: maximal number of iterations in stages
8. output: 0 to disable output of the POVMs, 1 to enable output of POVMs after every minimization stage
8. output: 0 to disable output of the POVMs, 1 to enable output of POVMs at the end, 2 to enable output of POVMs after every minimization stage
9. gamma: regularization parameter $\gamma$
10. epsilon: value of the convergence criterion
11. index of stage to start with, i.e., 1 or 2
12. 0 to start with the initial guess $\varPi=1/N$, 1 to read the output from a previous run as a starting point
13. smoothing distance factor $N_s$
14. benchmark underlying operations: 0 for no, 1 for yes

### Inputs
Without the Python wrapper, the programs `pqdts_omp.x` and `pqdts_omp_mpi.x` expect inputs ($P$ matrix and optionally the $F$ matrix) in the `data` directory in the current working directory. The following files are expected:
Expand Down
4 changes: 2 additions & 2 deletions condat_simplexproj.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
extern "C"{
void simplexproj_condat_(double* y, double* x, int* plength) {
int length=plength[0];
double* aux = (x==y ? (double*)malloc(length*sizeof(double)) : x);
double* aux = x; //(x==y ? (double*)malloc(length*sizeof(double)) : x);
double* aux0=aux;
int auxlength=1;
int auxlengthold=-1;
Expand Down Expand Up @@ -91,6 +91,6 @@ void simplexproj_condat_(double* y, double* x, int* plength) {
} while (auxlength<=auxlengthold);
for (i=0; i<length; i++)
x[i]=(y[i]>tau ? y[i]-tau : 0.0);
if (x==y) free(aux0);
// if (x==y) free(aux0);
}
}
44 changes: 36 additions & 8 deletions job.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,49 @@
#!/bin/bash
#SBATCH -t 1-0
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -t 0:10:00
#SBATCH --mem=230G
#SBATCH --exclusive
#SBATCH --cpus-per-task=128
#SBATCH --ntasks-per-node=8
#SBATCH --cpus-per-task=16
#SBATCH -x n2dgx01,n2gpu12[01-32],n2hcn[01-05],n2hacc[01-03],n2cn[1101-1196]

#module reset
#module load perf/OSU-Micro-Benchmarks/5.7.1-gompi-2021b
#srun --ntasks-per-node=1 -n $SLURM_JOB_NUM_NODES osu_mbw_mr > osu.out

module reset
module load toolchain/foss/2023b
module load lang/Python/3.11.5-GCCcore-13.2.0
module load lib/zstd/1.5.5-GCCcore-13.2.0
source ./pqdts_env/bin/activate

make clean
make pqdts
make pqdts_mpi

export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
export OMP_PLACES=cores
export OMP_PROC_BIND=true

D=$1
mkdir simulated_D_8200_Dmax_${D}
cd simulated_D_8200_Dmax_${D}
python3 ../pqdts.py -P ../../det-tomo/simulation/data/simulatedP_D_8200.npz -t $SLURM_CPUS_PER_TASK --pqdtspath ../pqdts_omp.x -m 1000 -g 0.0 -e 0.000000001 -D $D -v -b > simulated_D_8200_Dmax_${D}.out
D=`echo "sqrt(200000000000/8/6/151*$SLURM_JOB_NUM_NODES)" | bc`
#eps=`echo "0.000001/$D" | bc -l`
eps=`echo "0.0000001/$D" | bc -l`
#eps=1
dir="simulated_D_260001_Nodes_${SLURM_JOB_NUM_NODES}_Dmax_${D}_$1"
rm -rf $dir
mkdir $dir
cd $dir

cp ../pqdts_omp_mpi.x .
python ../pqdts.py -P ../simulatedP_D_260001.npz -p ./pqdts_omp_mpi.x -v -b -d -D $D -e $eps > prep.out 2> prep.err

cmd=`tail -n1 prep.out`
wd=`pwd`
srun --ntasks-per-node=1 -n $SLURM_JOB_NUM_NODES cp pqdts_omp_mpi.x /dev/shm
srun --ntasks-per-node=1 -n $SLURM_JOB_NUM_NODES cp -r data /dev/shm

cd /dev/shm
srun $cmd > $wd/run.out 2> $wd/run.err

#compress output
srun --ntasks-per-node=1 -n $SLURM_JOB_NUM_NODES bash $wd/../compress_and_copy.sh $wd

67 changes: 59 additions & 8 deletions pqdts.f90
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,6 @@ subroutine tprint(iter)
write (43, *) iter, timingn(i), timingc(i), timingsum(i)
end do
call flush (43)

end subroutine

subroutine fprob(x, mean, prob)
Expand Down Expand Up @@ -1064,6 +1063,7 @@ subroutine conv_test(X, DLDX, tprint, conv)
call MPI_ALLREDUCE(MPI_IN_PLACE, conv, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD, ierr)
#endif
conv = sqrt(conv/(real(N, kind=real64)*real(M, kind=real64)))
!conv = sqrt(conv/(real(N, kind=real64)))
end subroutine

subroutine read_unformatted_binary_from_python(M, N, filename, A)
Expand Down Expand Up @@ -1202,7 +1202,7 @@ program pqdts
real(kind=real64) :: conv, xmin
logical :: tproj = .true.
logical, parameter :: testder = .false.
logical, parameter :: tbench = .false.
logical :: tbench = .false.
real(kind=real64), parameter :: dxp = 1e-7
real(kind=real64) :: tactivetol = 1e-8
real(kind=real64) :: conv_thres = 1e-6
Expand All @@ -1214,7 +1214,7 @@ program pqdts
integer(kind=int32) :: tactive = 0
logical :: precond = .false.
real(kind=real64) :: Fsize = 0, dlalpha, smo_fact
integer(kind=int32) :: output, state, start_stage, rank2, read_input
integer(kind=int32) :: output, state, start_stage, rank2, read_input, bench
integer(kind=int64) :: nact
integer(kind=int64) :: M2, N2, D2, localrows2
Character(len=255) :: str
Expand Down Expand Up @@ -1266,6 +1266,12 @@ program pqdts
read (arg, *) read_input
CALL getarg(13, arg)
read (arg, *) smo_fact
CALL getarg(14, arg)
read (arg, *) bench

if(bench.eq.1)then
tbench=.true.
endif

#ifdef MPI
call MPI_Init(ierror)
Expand Down Expand Up @@ -1309,7 +1315,6 @@ program pqdts
end if

print *, rank, "localrows", localrows0, localrows1, localrows, rowdist(localrows0), rowdist(localrows1)

if (Fl .ge. 0) then
!FIXME only read F partially
call read_sparse_from_python(Fl, "data/F_row.bin", "data/F_col.bin", "data/F_data.bin", Fi, Fj, Fd)
Expand Down Expand Up @@ -1531,7 +1536,7 @@ program pqdts
else
call random_number(P)
end if

allocate (X(localrows, N))
call pset(M, N, 0.0D0, x)
allocate (dLdX(localrows, N))
Expand Down Expand Up @@ -1594,10 +1599,10 @@ program pqdts
#endif
t1 = omp_get_wtime()
call dL(X, dLdX)
t2 = omp_get_wtime()
#ifdef MPI
call MPI_Barrier(MPI_COMM_WORLD, ierr)
#endif
t2 = omp_get_wtime()
if (rank .eq. 0) print *, "t dL", t2 - t1, dLdX(1, 1)
end do

Expand Down Expand Up @@ -2131,8 +2136,11 @@ program pqdts
end if

end do
call conv_test(X, DLDX, output.ne.0, conv)
if (output .ge. 1) then
call conv_test(X, DLDX, output.ge.3, conv)
if (output .ge. 2) then
#ifdef MPI
call MPI_Barrier(MPI_COMM_WORLD, ierr)
#endif
call tstart("output")
!export
write (outname, '(a,i6,a,i6,a)') "rank_", rank, "_oiter", oiter, ".dat"
Expand All @@ -2151,11 +2159,51 @@ program pqdts
write (42) X
close (42)
call tstop("output")
#ifdef MPI
call MPI_Barrier(MPI_COMM_WORLD, ierr)
#endif
end if
call tprint(oiter)
end do
call tstop("min")

if (output .ge. 1) then
!some deallcoation to free memory to write to /dev/shm for compression
deallocate(dldx)
deallocate(sn)
deallocate(xtmp)
deallocate(O)
deallocate(z)
deallocate(ik)
#ifdef MPI
call MPI_Barrier(MPI_COMM_WORLD, ierr)
#endif
call tstart("output")
!export
write (outname, '(a,i6,a,i6,a)') "rank_", rank, "_final.dat"
print *, outname
INQUIRE (FILE=outname, EXIST=file_exists)
if (file_exists) then
open (42, file=trim(outname), status='old', FORM='unformatted')
else
open (42, file=trim(outname), status='new', FORM='unformatted')
end if
write (42) M
write (42) N
write (42) D
write (42) localrows
write (42) rank
write (42) X
close (42)
call tstop("output")
#ifdef MPI
call MPI_Barrier(MPI_COMM_WORLD, ierr)
#endif
end if
call tprint(-1)
#ifdef MPI
call MPI_Barrier(MPI_COMM_WORLD, ierr)
#endif

!get memory usage
open (43, file="/proc/self/status", status='old', iostat=state)
Expand All @@ -2173,6 +2221,9 @@ program pqdts
end do
close (43)
close (44)
#ifdef MPI
call MPI_Barrier(MPI_COMM_WORLD, ierr)
#endif

#ifdef MPI
call MPI_Finalize(ierror)
Expand Down
26 changes: 19 additions & 7 deletions pqdts.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def npz_to_fortran(npz,matname):
coodata.tofile('data/'+matname+'_data.bin')
return len(coo.data)

def run_pqdts(N,M,D,P,threads,pqdtspath,maxiter=100,tol=1e-6,gamma=0,smo_int=0,smo_dist=0,start_stage=1,read_input=0,smo_fact=0,output=1,verbose=False,F=None):
def run_pqdts(N,M,D,P,threads,pqdtspath,maxiter=100,tol=1e-6,gamma=0,smo_int=0,smo_dist=0,start_stage=1,read_input=0,smo_fact=0,output=1,verbose=False,F=None,dry=False,benchops=0):
#set environment variables for OpenMP
os.environ["OMP_NUM_THREADS"] = str(threads)
os.environ["OMP_PROC_BIND"] = "True"
Expand All @@ -38,10 +38,14 @@ def run_pqdts(N,M,D,P,threads,pqdtspath,maxiter=100,tol=1e-6,gamma=0,smo_int=0,s
if not (F is None):
NF=npz_to_fortran(F,"F")
#call pqdts
cmd=pqdtspath+" "+str(M)+" "+str(N)+" "+str(D)+" "+str(NF)+" "+str(NP)+" 2 "+str(maxiter)+" "+str(output)+" "+str(gamma)+" "+str(tol)+" "+str(start_stage)+" "+str(read_input)+" "+str(smo_fact)
cmd=pqdtspath+" "+str(M)+" "+str(N)+" "+str(D)+" "+str(NF)+" "+str(NP)+" 2 "+str(maxiter)+" "+str(output)+" "+str(gamma)+" "+str(tol)+" "+str(start_stage)+" "+str(read_input)+" "+str(smo_fact)+" "+str(benchops)
if dry:
print("not executing:")
print(cmd)
return None

if verbose:
print("executing:",cmd)

start = time.time()
out=subprocess.run(cmd, shell=True, capture_output=True)
end = time.time()
Expand Down Expand Up @@ -93,7 +97,9 @@ def run_pqdts(N,M,D,P,threads,pqdtspath,maxiter=100,tol=1e-6,gamma=0,smo_int=0,s
parser.add_argument("-e", "--epsilon", help="convergence parameter of minimization",type=float,default=1e-6)
parser.add_argument("-g", "--gamma", help="regularization parameter",type=float,default=0)
parser.add_argument("-m", "--maxiter", help="maximal number of iterations",type=int,default=200)
parser.add_argument("-b", "--benchmark", help="benchmark mode, don't write output POVMs",action='store_true')
parser.add_argument("-T", "--timing", help="measure timing for reconstruction, don't write output POVMs",action='store_true')
parser.add_argument("-b", "--benchmarkops", help="measure timing for underlying operations",action='store_true')
parser.add_argument("-d", "--dryrun", help="dry-run: only prepare inputs for pqdts",action='store_true')
parser.add_argument("-v", "--verbose", help="be more verbose",action='store_true')
args = parser.parse_args()

Expand Down Expand Up @@ -136,12 +142,18 @@ def run_pqdts(N,M,D,P,threads,pqdtspath,maxiter=100,tol=1e-6,gamma=0,smo_int=0,s

#call OpenMP-version of pqdts
output=1
if args.benchmark:
benchops=0
if args.timing:
output=0
if args.benchmarkops:
benchops=1
if not(args.Fmatrix is None):
povm=run_pqdts(N,M,D,P,threads=args.threads,pqdtspath=args.pqdtspath,maxiter=args.maxiter,tol=args.epsilon,gamma=args.gamma,verbose=args.verbose,output=output,F=F)
povm=run_pqdts(N,M,D,P,threads=args.threads,pqdtspath=args.pqdtspath,maxiter=args.maxiter,tol=args.epsilon,gamma=args.gamma,verbose=args.verbose,output=output,F=F,dry=args.dryrun,benchops=benchops)
else:
povm=run_pqdts(N,M,D,P,threads=args.threads,pqdtspath=args.pqdtspath,maxiter=args.maxiter,tol=args.epsilon,gamma=args.gamma,verbose=args.verbose,output=output)
povm=run_pqdts(N,M,D,P,threads=args.threads,pqdtspath=args.pqdtspath,maxiter=args.maxiter,tol=args.epsilon,gamma=args.gamma,verbose=args.verbose,output=output,dry=args.dryrun,benchops=benchops)

if povm is None:
quit()

#check constraints
x1=0
Expand Down

0 comments on commit b44d06b

Please sign in to comment.