From 971c01b08cc4ed68a61f42883b4a3fceecc8d1ae Mon Sep 17 00:00:00 2001 From: jurjen93 Date: Fri, 30 Aug 2024 09:56:52 +0200 Subject: [PATCH] symlink unlink --- subtract/subtract_with_dp3.py | 431 ------------------------------ subtract/subtract_with_wsclean.py | 40 ++- 2 files changed, 39 insertions(+), 432 deletions(-) delete mode 100644 subtract/subtract_with_dp3.py diff --git a/subtract/subtract_with_dp3.py b/subtract/subtract_with_dp3.py deleted file mode 100644 index 76149134..00000000 --- a/subtract/subtract_with_dp3.py +++ /dev/null @@ -1,431 +0,0 @@ -import numpy as np -import sys -import os -import casacore.tables as ct -import tables -import re -import pandas as pd -from subprocess import check_output, STDOUT -from argparse import ArgumentParser - - -def get_largest_divider(inp, max=1000): - """ - Get largest divider - - :param inp: input number - :param max: max divider - - :return: largest divider from inp bound by max - """ - for r in range(max)[::-1]: - if inp % r == 0: - return r - sys.exit("ERROR: code should not arrive here.") - - -def isfloat(num): - """ - Check if value is a float - """ - try: - float(num) - return True - except ValueError: - return False - - -def parse_history(ms, hist_item): - """ - Grep specific history item from MS - - :param ms: measurement set - :param hist_item: history item - - :return: parsed string - """ - hist = os.popen('taql "SELECT * FROM ' + ms + '::HISTORY" | grep ' + hist_item).read().split(' ') - for item in hist: - if hist_item in item and len(hist_item) <= len(item): - return item - print('WARNING:' + hist_item + ' not found') - return None - - -def get_time_preavg_factor(ms: str = None): - """ - Get time pre-averaging factor (given by demixer.timestep) - - :param ms: measurement set - - :return: averaging integer - """ - parse_str = "demixer.timestep=" - parsed_history = parse_history(ms, parse_str) - avg_num = re.findall(r'\d+', parsed_history.replace(parse_str, ''))[0] - if avg_num.isdigit(): - factor = int(float(avg_num)) - if factor != 1: - print("WARNING: " + ms + " time has been pre-averaged with factor " + str( - factor) + ". This might cause time smearing effects.") - return factor - elif isfloat(avg_num): - factor = float(avg_num) - print("WARNING: parsed factor in " + ms + " is not a digit but a float") - return factor - else: - print("WARNING: parsed factor in " + ms + " is not a float or digit") - return None - - -class SubtractDP3: - - def __init__(self, mslist: list = None): - self.mslist = mslist - self.cmd = ['DP3', - 'msin.missingdata=True', - 'msin.orderms=False', - 'msout.storagemanager=dysco'] - self.steps = [] - - @staticmethod - def isfulljones(h5: str = None): - """ - Verify if file is fulljones - - :param h5: h5 file - """ - T = tables.open_file(h5) - soltab = list(T.root.sol000._v_groups.keys())[0] - if 'pol' in T.root.sol000._f_get_child(soltab).val.attrs["AXES"].decode('utf8'): - if T.root.sol000._f_get_child(soltab).pol[:].shape[0] == 4: - T.close() - return True - T.close() - return False - - def make_template_modelcolumn(self): - """ - Make template model column with 0 values - """ - - for ms in self.mslist: - - ts = ct.table(ms, readonly=False) - colnames = ts.colnames() - - if "MODEL_DATA" not in colnames: - # get column description from DATA - desc = ts.getcoldesc('DATA') - # create output column - desc['name'] = "MODEL_DATA" - # create template for output column - ts.addcols(desc) - - else: - print("WARNING: MODEL_DATA already exists") - # get number of rows - nrows = ts.nrows() - # make sure every slice has the same size - best_slice = get_largest_divider(nrows, 1000) - for c in range(0, nrows, best_slice): - model = ts.getcol('MODEL_DATA', startrow=c, nrow=best_slice) - ts.putcol('MODEL_DATA', model * 0, startrow=c, nrow=best_slice) - - def predict(self, - sourcedb: list = None, - h5parm: list = None): - """ - Predict with DP3 (see https://dp3.readthedocs.io/en/latest/steps/Predict.html) - - :param sourcedb: sky model - :param h5parm: h5 solutions - """ - - for n, source in enumerate(sourcedb): - self.steps.append(f'beam{n}') - self.steps.append(f'predict{n}') - - pnum = source.split('/')[-1].split("_")[0] - lnum = source.split('/')[-1].split("_")[1].split('-')[0] - h5 = [h5 for h5 in h5parm if pnum in h5 and lnum in h5][0] - - H = tables.open_file(h5) - direction = H.root.sol000.source[:]['dir'] % (2 * np.pi) - direction *= 360 / (2 * np.pi) - - self.cmd += [f'predict{n}.type=predict', - f'predict{n}.sourcedb={source}', - f'predict{n}.applycal.steps=[amp,phase]', - f'predict{n}.applycal.amp.correction=amplitude000', - f'predict{n}.applycal.phase.correction=phase000', - f'predict{n}.applycal.parmdb={h5}', - f'predict{n}.operation=add', - f'beam{n}.type=applybeam', - f'beam{n}.direction=[{round(direction[0][0], 5)}deg,{round(direction[0][1], 5)}deg]' - ] - - self.cmd += ['steps=' + str(self.steps).replace(" ", "").replace("\'", ""), - 'msout.datacolumn=MODEL_DATA', - 'msin.datacolumn=MODEL_DATA', - f'msin={",".join(self.mslist)}', - 'msout=.'] - - print('\n'.join(self.cmd)) - - return self - - def subtract_col(self, out_column: str = None): - - """ - Subtract column in Measurement Set - :param out_column: out column name - """ - - for ms in self.mslist: - print('Subtract ' + ms) - ts = ct.table(ms, readonly=False) - colnames = ts.colnames() - - if "MODEL_DATA" not in colnames: - sys.exit( - f"ERROR: MODEL_DATA does not exist in {ms}.\nThis is most likely due to a failed predict step.") - - if out_column not in colnames: - # get column description from DATA - desc = ts.getcoldesc('DATA') - # create output column - desc['name'] = out_column - # create template for output column - ts.addcols(desc) - - else: - print(out_column, ' already exists') - - # get number of rows - nrows = ts.nrows() - # make sure every slice has the same size - best_slice = get_largest_divider(nrows, 1000) - for c in range(0, nrows, best_slice): - if c == 0: - print('SUBTRACT --> DATA - MODEL_DATA') - data = ts.getcol('DATA', startrow=c, nrow=best_slice) - model = ts.getcol('MODEL_DATA', startrow=c, nrow=best_slice) - ts.putcol(out_column, data - model, startrow=c, nrow=best_slice) - ts.close() - - return self - - def moreDP3(self, - phaseshift: str = None, - freqavg: str = None, - timeavg: str = None, concat: bool = None, - applybeam: bool = None, - applycal_h5: str = None, - dirname: str = None): - - """ - Run DP3 command - - :param phaseshift: do phase shift to specific center - :param freqavg: frequency averaging - :param timeavg: time averaging - :param concat: concat the measurement sets - :param applybeam: apply beam in phaseshifted phase center (or otherwise center of field) - :param applycal_h5: applycal solution file - :param dirname: direction name - """ - - self.cmd += ['msin.datacolumn=SUBTRACT_DATA'] - - # 1) PHASESHIFT - if phaseshift is not None: - phasecenter = phaseshift.replace('[', '').replace(']', '').split(',') - phasecenter = f'[{phasecenter[0]},{phasecenter[1]}]' - self.steps.append('ps') - self.cmd += ['ps.type=phaseshifter', - 'ps.phasecenter=' + phasecenter] - - # 2) APPLY BEAM - if applybeam: - self.steps.append('beam') - self.cmd += ['beam.type=applybeam', - 'beam.direction=[]', - 'beam.updateweights=True'] - - # 3) APPLYCAL - if applycal_h5 is not None: - # add fulljones solutions apply - if self.isfulljones(applycal_h5): - self.steps.append('ac') - self.cmd += ['ac.type=applycal', - 'ac.parmdb=' + applycal_h5, - 'ac.correction=fulljones', - 'ac.soltab=[amplitude000,phase000]'] - if phaseshift is not None and dirname is not None: - self.cmd += ['ac.direction=' + dirname] - # add non-fulljones solutions apply - else: - ac_count = 0 - T = tables.open_file(applycal_h5) - for corr in T.root.sol000._v_groups.keys(): - self.cmd += [f'ac{ac_count}.type=applycal', - f'ac{ac_count}.parmdb={applycal_h5}', - f'ac{ac_count}.correction={corr}'] - if phaseshift is not None and dirname is not None: - self.cmd += [f'ac{ac_count}.direction=' + dirname] - self.steps.append(f'ac{ac_count}') - ac_count += 1 - T.close() - - # 4) AVERAGING - if freqavg is not None or timeavg is not None: - self.steps.append('avg') - self.cmd += ['avg.type=averager'] - if freqavg is not None: - if str(freqavg).isdigit() or not str(freqavg)[-1].isalpha(): - self.cmd += [f'avg.freqstep={int(freqavg)}'] - else: - self.cmd += [f'avg.freqresolution={freqavg}'] - if timeavg is not None: - if str(timeavg).isdigit(): - self.cmd += [f'avg.timestep={int(timeavg)}'] - else: - self.cmd += [f'avg.timeresolution={timeavg}'] - - self.cmd += ['steps=' + str(self.steps).replace(" ", "").replace("\'", "")] - - self.cmd += [f'msin={",".join(self.mslist)}', - f'msout=sub_{self.mslist[0]}'] - - print('\n'.join(self.cmd)) - - return self - - def run(self, type=''): - """ - Run DP3 command - - :param type: type name - """ - - for n, ms in enumerate(self.mslist): - dp3_cmd = open(f"dp3{type}_{n}.cmd", "w") - dp3_cmd.write('\n'.join(self.cmd)) - dp3_cmd.close() - check_output(' '.join(self.cmd), stderr=STDOUT, shell=True) - - self.cmd = ['DP3', - 'msin.missingdata=True', - 'msin.orderms=False', - 'msout.storagemanager=dysco'] - self.steps = [] - - return self - - -def parse_args(): - """ - Command line argument parser - """ - parser = ArgumentParser(description='Subtract region with WSClean') - parser.add_argument('--mslist', nargs='+', help='measurement sets', required=True) - parser.add_argument('--sourcedb', nargs='+', help='source models', required=True) - parser.add_argument('--region', type=str, help='region file', required=True) - parser.add_argument('--output_name', type=str, help='name of output files (default is model image name)') - parser.add_argument('--skip_predict', action='store_true', - help='skip predict and do only subtract') - parser.add_argument('--h5parm_predict', nargs='+', help='h5 solution files corresponding with sourcedb') - parser.add_argument('--phasecenter', type=str, - help='phaseshift to given point (example: --phaseshift 16h06m07.61855,55d21m35.4166)') - parser.add_argument('--freqavg', type=str, help='frequency averaging') - parser.add_argument('--timeavg', type=str, help='time averaging') - parser.add_argument('--concat', action='store_true', help='concat MS') - parser.add_argument('--applybeam', action='store_true', help='apply beam in phaseshift center or center of field') - parser.add_argument('--applycal', action='store_true', help='applycal after subtraction and phaseshifting') - parser.add_argument('--applycal_h5', type=str, help='applycal solution file') - parser.add_argument('--print_only_commands', action='store_true', help='only print commands for testing purposes') - parser.add_argument('--forwidefield', action='store_true', - help='will search for the polygon_info.csv file to extract information from') - return parser.parse_args() - - -def main(): - args = parse_args() - - Subtract = SubtractDP3(args.mslist) - - # --forwidefield --> will read averaging and phasecenter from polygon_info.csv - if args.forwidefield: - if os.path.isfile('polygon_info.csv'): - polygon_info = pd.read_csv('polygon_info.csv') - elif os.path.isfile('../polygon_info.csv'): - polygon_info = pd.read_csv('../polygon_info.csv') - elif os.path.isfile('../../polygon_info.csv'): - polygon_info = pd.read_csv('../../polygon_info.csv') - elif os.path.isfile('../../../polygon_info.csv'): - polygon_info = pd.read_csv('../../../polygon_info.csv') - else: - sys.exit('ERROR: using --forwidefield option needs polygon_info.csv file to read polygon information from') - - t = ct.table(args.mslist[0] + "::SPECTRAL_WINDOW") - channum = len(t.getcol("CHAN_FREQ")[0]) - t.close() - - polygon = polygon_info.loc[polygon_info.polygon_file == args.region.split('/')[-1]] - try: - phasecenter = polygon['poly_center'].values[0] - except AttributeError: - print('WARNING: no poly center in polygon_info.csv, use dir instead.') - phasecenter = polygon['dir'].values[0] - except KeyError: - print('WARNING: no poly center in polygon_info.csv, use dir instead.') - phasecenter = polygon['dir'].values[0] - - # take only averaging factors that are channum%avg==0 - avg = get_largest_divider(channum, int(polygon['avg'].values[0])) - - freqavg = int(avg) - try: - # if there is pre averaging done on the ms, we need to take this into account - timeavg = int(freqavg / get_time_preavg_factor(args.mslist[0])) - except: - timeavg = int(freqavg) - dirname = polygon['dir_name'].values[0] - - else: - phasecenter = args.phasecenter - freqavg = args.freqavg - timeavg = args.timeavg - dirname = None - - if not args.skip_predict: - print('############## PREDICT ##############') - Subtract.make_template_modelcolumn() - Subtract.predict(sourcedb=args.sourcedb, h5parm=args.h5parm_predict) - if not args.print_only_commands: - Subtract.run(type='predict') - Subtract.subtract_col('SUBTRACT_DATA') - - if args.phasecenter is not None or \ - args.freqavg is not None or \ - args.timeavg is not None or \ - args.concat is not None or \ - args.applybeam is not None or \ - args.applycal is not None: - print('############## RUN DP3 ##############') - if args.applycal_h5 is not None: - applycalh5 = args.applycal_h5 - elif args.applycal and not args.applycal_h5: - sys.exit("ERROR: need a solution file for applycal (give with --applycal_h5)") - else: - applycalh5 = None - - Subtract.moreDP3(phaseshift=phasecenter, freqavg=freqavg, timeavg=timeavg, - concat=args.concat, applybeam=args.applybeam, applycal_h5=applycalh5, dirname=dirname) - if not args.print_only_commands: - Subtract.run(type='phaseshift') - - -if __name__ == "__main__": - main() diff --git a/subtract/subtract_with_wsclean.py b/subtract/subtract_with_wsclean.py index f0defe99..30384876 100644 --- a/subtract/subtract_with_wsclean.py +++ b/subtract/subtract_with_wsclean.py @@ -12,6 +12,7 @@ import pandas as pd from argparse import ArgumentParser import random +import shutil def add_trailing_zeros(s, digitsize=4): @@ -26,6 +27,37 @@ def add_trailing_zeros(s, digitsize=4): return padded_string[-digitsize:] +def unlink(symlink_path): + """ + Replaces a symbolic link with the actual data it points to. + + :param symlink_path: Path to the symbolic link to be replaced + """ + try: + # Check if the provided path is a symbolic link + if os.path.islink(symlink_path): + # Get the actual path the symlink points to + target_path = os.readlink(symlink_path) + + # Remove the symlink + os.unlink(symlink_path) + print(f"Symlink '{symlink_path}' removed.") + + # Copy the data from the target path to the symlink location + if os.path.isdir(target_path): + shutil.copytree(target_path, symlink_path) + print(f"Directory '{target_path}' copied to '{symlink_path}'.") + else: + shutil.copy2(target_path, symlink_path) + print(f"File '{target_path}' copied to '{symlink_path}'.") + else: + print(f"'{symlink_path}' is not a symbolic link.") + except FileNotFoundError: + print(f"The symlink '{symlink_path}' does not exist.") + except OSError as e: + print(f"Error: {e} - Could not replace the symlink with data.") + + def get_largest_divider(inp, max=1000): """ Get largest divider @@ -779,15 +811,21 @@ def main(): hasfolder = hasfolder[0:10] absolute_path = os.path.abspath('/tmp') runpath = absolute_path+'/'+hasfolder + + # mkdir and copy files command = [f'mkdir -p {runpath}', f'cp *.fits {runpath}', f'cp {args.region} {runpath}'] - command += [f'rsync -a --no-perms {dataset} {runpath}' for dataset in args.mslist] + command += [f'cp {dataset} {runpath}' for dataset in args.mslist] command += [f'rm -rf {dataset}' for dataset in args.mslist] os.system('&&'.join(command)) outpath = os.getcwd() os.chdir(runpath) + # replace symlinks with data to correct + for ms in args.mslist: + unlink(ms.split('/')[-1]) + # set subtract object subpred = SubtractWSClean(mslist=args.mslist if not args.scratch else [ms.split('/')[-1] for ms in args.mslist],