diff --git a/FINDSOURCES.md b/FINDSOURCES.md new file mode 100644 index 00000000..de230eff --- /dev/null +++ b/FINDSOURCES.md @@ -0,0 +1,223 @@ +# find\_sources.py + +This script allows you to quickly query data from the RACS and VAST Pilot surveys on provided coordinates, either through the command line or using a csv list. + +This includes Stokes I and Stokes V (negative sources included). + +The outputs are/can be: +* Postage stamp fits images of each location. +* DS9 region file or kvis annotation file of selavy sources withing postage stamp. +* Figure png plots of postage stamp, with overlaid selavy sources. +* Crossmatch output file containing information on the nearest matched component. + +# Running on your own machine +By default the script is set up for use on the ada machine. Copies of the survey data is required to run elsewhere (will change in the future once RACS becomes publicly available). + +The script assumes that VAST Pilot data are in the same directory structure as that used in the dropbox folder. If you are running `find_sources.py` on your own machine we recommend first using the `--find-fields` flag, downloading the relevant fields to an appropriate base directory and then re-running the script as normal. + +## Warning! +* Currently RACSv2 is being used. This does not include selavy catalogues for latest observations, mainly the southern polar cap. +* Some VAST Pilot fields are undergoing reprocessing or will need to be reobserved in the future. + + +## Requirements +* Python 3 +* pandas +* numpy +* astropy +* matplotlib +* scipy +* colorlog (optional) + +Latest versions of above recommended. + +There is a requirements.txt included in the repository that you can use to install the dependancies using +``` +pip install -r requirements.txt +```` + +* Access to survey images and associated selavy outputs. + +## Usage + +Most options should be self explanatory. See examples below on how to run the script. + +All output is placed in an output directory of which the name can be set with the option `--out-folder`. + +Can be run in either Stokes I or Stokes V, not both at once. +``` +usage: find_sources.py [-h] [--imsize IMSIZE] [--maxsep MAXSEP] + [--out-folder OUT_FOLDER] [--source-names SOURCE_NAMES] + [--crossmatch-radius CROSSMATCH_RADIUS] [--use-tiles] + [--img-folder IMG_FOLDER] [--rms-folder RMS_FOLDER] + [--cat-folder CAT_FOLDER] [--create-png] + [--png-selavy-overlay] + [--png-linear-percentile PNG_LINEAR_PERCENTILE] + [--png-use-zscale] + [--png-zscale-contrast PNG_ZSCALE_CONTRAST] + [--png-no-island-labels] + [--png-ellipse-pa-corr PNG_ELLIPSE_PA_CORR] + [--png-no-colorbar] [--ann] [--reg] [--stokesv] [--quiet] + [--crossmatch-only] [--selavy-simple] [--process-matches] + [--debug] [--no-background-rms] [--find-fields] + "HH:MM:SS [+/-]DD:MM:SS" OR input.csv + +positional arguments: + "HH:MM:SS [+/-]DD:MM:SS" OR input.csv + Right Ascension and Declination in formnat "HH:MM:SS + [+/-]DD:MM:SS", in quotes. E.g. "12:00:00 -20:00:00". + Degrees is also acceptable, e.g. "12.123 -20.123". + Multiple coordinates are supported by separating with + a comma (no space) e.g. "12.231 -56.56,123.4 +21.3. + Finally you can also enter coordinates using a .csv + file. See example file for format. + +optional arguments: + -h, --help show this help message and exit + --imsize IMSIZE Edge size of the postagestamp in arcmin (default: + 30.0) + --maxsep MAXSEP Maximum separation of source from beam centre in + degrees. (default: 1.0) + --out-folder OUT_FOLDER + Name of the output directory to place all results in. + (default: find_sources_output_20191115_18:33:09) + --source-names SOURCE_NAMES + Only for use when entering coordaintes via the command + line. State the name of the source being searched. Use + quote marks for names that contain a space. For + multiple sources separate with a comma with no space, + e.g. "SN 1994N,SN 2003D,SN 2019A" (default: ) + --crossmatch-radius CROSSMATCH_RADIUS + Crossmatch radius in arcseconds (default: 15.0) + --use-tiles Use the individual tiles instead of combined mosaics. + (default: False) + --img-folder IMG_FOLDER + Path to folder where images are stored (default: None) + --rms-folder RMS_FOLDER + Path to folder where image RMS estimates are stored + (default: None) + --cat-folder CAT_FOLDER + Path to folder where selavy catalogues are stored + (default: None) + --create-png Create a png of the fits cutout. (default: False) + --png-selavy-overlay Overlay selavy components onto the png image. + (default: False) + --png-linear-percentile PNG_LINEAR_PERCENTILE + Choose the percentile level for the png normalisation. + (default: 99.9) + --png-use-zscale Select ZScale normalisation (default is 'linear'). + (default: False) + --png-zscale-contrast PNG_ZSCALE_CONTRAST + Select contrast to use for zscale. (default: 0.1) + --png-no-island-labels + Disable island lables on the png. (default: False) + --png-ellipse-pa-corr PNG_ELLIPSE_PA_CORR + Correction to apply to ellipse position angle if + needed (in deg). Angle is from x-axis from left to + right. (default: 0.0) + --png-no-colorbar Do not show the colorbar on the png. (default: False) + --ann Create a kvis annotation file of the components. + (default: False) + --reg Create a DS9 region file of the components. (default: + False) + --stokesv Use Stokes V images and catalogues. Works with + combined images only! (default: False) + --quiet Turn off non-essential terminal output. (default: + False) + --crossmatch-only Only run crossmatch, do not generate any fits or png + files. (default: False) + --selavy-simple Only include flux density and uncertainty from selavy + in returned table. (default: False) + --process-matches Only produce data products for sources that have a + match from selavy. (default: False) + --debug Turn on debug output. (default: False) + --no-background-rms Do not estimate the background RMS around each source. + (default: False) + --find-fields Only return the associated field for each source. + (default: False) + --vast-pilot Query the VAST Pilot instead of RACS. Input is the + epoch number of the VAST pilot. (default: None) +``` + +## Inputs + +The scipt take one main input which is coordinates, either direct in the command line or using an input csv file. + +### Command line: Single Coordinate +Here the format can be either in Hours or decimal degrees: +* `"HH:MM:SS.ss +/-DD:MM:SS.ss"` +* `"DDD.ddd +/-DD.ddd"` +Note the space between the coodinates and the quotation marks. + +E.g. +``` +python find_sources.py "22:37:5.6000 +34:24:31.90" +``` +``` +python find_sources.py "339.2733333 34.4088611" +``` + +It's recommended to provide a source name using the option `--source-names`, e.g. +``` +python find_sources.py "22:37:5.6000 +34:24:31.90" --source-names "SN 2014C" +``` + + +### Command line: Multiple Coordinates +Same format as above but now separate coodinates with `,`: +* `"HH:MM:SS.ss +/-DD:MM:SS.ss,HH:MM:SS.ss +/-DD:MM:SS.ss,HH:MM:SS.ss +/-DD:MM:SS.ss"` +* `"DDD.ddd +/-DD.ddd,DDD.ddd +/-DD.ddd,DDD.ddd +/-DD.ddd"` + +Note there is no space between the commas. + +E.g. +``` +python find_sources.py "22:37:5.6000 +34:24:31.90,22:37:5.6000 -34:24:31.90,13:37:5.6000 -84:24:31.90" +``` +``` +python find_sources.py "339.2733333 34.4088611,154.2733333 -34.4088611,20.2733333 -54.4088611" +``` + +Source names can still be defined using the option `--source-names` with the same comma notation e.g. + +``` +python find_sources.py "22:37:5.6000 +34:24:31.90,22:37:5.6000 -34:24:31.90,13:37:5.6000 -84:24:31.90" --source-names "SN 2014C,SN 2012C,SN2019B" +``` + +### Input CSV file +To crossmatch many coordinates it's recommended to use a csv. Instead of entering coordaintes enter the name of the csv. The `--source-names` options is not used with CSV files. + +E.g. +``` +python find_sources.py my_coords.csv +``` + +The columns `ra` and `dec` are required and can be in either of the formats shown in the command line options. `name` is also accepted and is recommended. E.g. +``` +ra,dec,name +123.45,-67.89,source name +``` + +See `input_example.csv`. + +## Examples + +Search for a match to one source and create a FITS postage stamp of 5 arcminutes across. Will place the output in `example_source`. + +``` +find_sources.py "22:37:5.6000 +34:24:31.90" --imsize 5.0 --source-names "SN 2014C" --out-folder example_source +``` + +To include a png output with selavy overlay: + +``` +find_sources.py "22:37:5.6000 +34:24:31.90" --imsize 5.0 --source-names "SN 2014C" --out-folder example_source --create-png --png-selavy-overlay +``` +Now search in Stokes V to a different directory and also include a kvis annotation file and an extra coodinate: +``` +find_sources.py "22:37:5.6000 +34:24:31.90,22:37:5.6000 +44:24:31.90" --imsize 5.0 --source-names "SN 2014C,SN 2019I" --out-folder example_source_stokesv_ --create-png --png-selavy-overlay --stokesv --ann +``` +Search through a csv of coordinates, make pngs, use zscale with a contrast of 0.2, create annotation and region files.: +``` +find_sources.py my_coords.csv --imsize 5.0 --out-folder example_source --create-png --png-selavy-overlay --png-use-zscale --png-zscale-contrast 0.2 --ann --reg +``` \ No newline at end of file diff --git a/README.md b/README.md index b429f8e3..900f9502 100644 --- a/README.md +++ b/README.md @@ -4,35 +4,14 @@ A space to share your hacky scripts that others may find useful. Currently these are scripts that can just be copied and run, not a module installation yet (soon). -## find_sources.py - -This script allows you to quickly query data from the RACS and VAST Pilot surveys on provided coordinates, either through the command line or using a csv list. - -This includes Stokes I and Stokes V (negative sources included). - -The outputs are/can be: -* Postage stamp fits images of each location. -* DS9 region file or kvis annotation file of selavy sources withing postage stamp. -* Figure png plots of postage stamp, with overlaid selavy sources. -* Crossmatch output file containing information on the nearest matched component. - -### Running on your own machine -By default the script is set up for use on the ada machine. Copies of the survey data is required to run elsewhere (will change in the future once RACS becomes publicly available). - -The script assumes that VAST Pilot data are in the same directory structure as that used in the dropbox folder. If you are running `find_sources.py` on your own machine we recommend first using the `--find-fields` flag, downloading the relevant fields to an appropriate base directory and then re-running the script as normal. - -### Warning! -* Currently RACSv2 is being used. This does not include selavy catalogues for latest observations, mainly the southern polar cap. -* Some VAST Pilot fields are undergoing reprocessing or will need to be reobserved in the future. - - -### Requirements +## Requirements * Python 3 * pandas * numpy * astropy * matplotlib * scipy +* dropbox * colorlog (optional) Latest versions of above recommended. @@ -42,188 +21,10 @@ There is a requirements.txt included in the repository that you can use to insta pip install -r requirements.txt ```` -* Access to survey images and associated selavy outputs. - -### Usage - -Most options should be self explanatory. See examples below on how to run the script. - -All output is placed in an output directory of which the name can be set with the option `--out-folder`. - -Can be run in either Stokes I or Stokes V, not both at once. -``` -usage: find_sources.py [-h] [--imsize IMSIZE] [--maxsep MAXSEP] - [--out-folder OUT_FOLDER] [--source-names SOURCE_NAMES] - [--crossmatch-radius CROSSMATCH_RADIUS] [--use-tiles] - [--img-folder IMG_FOLDER] [--rms-folder RMS_FOLDER] - [--cat-folder CAT_FOLDER] [--create-png] - [--png-selavy-overlay] - [--png-linear-percentile PNG_LINEAR_PERCENTILE] - [--png-use-zscale] - [--png-zscale-contrast PNG_ZSCALE_CONTRAST] - [--png-no-island-labels] - [--png-ellipse-pa-corr PNG_ELLIPSE_PA_CORR] - [--png-no-colorbar] [--ann] [--reg] [--stokesv] [--quiet] - [--crossmatch-only] [--selavy-simple] [--process-matches] - [--debug] [--no-background-rms] [--find-fields] - "HH:MM:SS [+/-]DD:MM:SS" OR input.csv - -positional arguments: - "HH:MM:SS [+/-]DD:MM:SS" OR input.csv - Right Ascension and Declination in formnat "HH:MM:SS - [+/-]DD:MM:SS", in quotes. E.g. "12:00:00 -20:00:00". - Degrees is also acceptable, e.g. "12.123 -20.123". - Multiple coordinates are supported by separating with - a comma (no space) e.g. "12.231 -56.56,123.4 +21.3. - Finally you can also enter coordinates using a .csv - file. See example file for format. - -optional arguments: - -h, --help show this help message and exit - --imsize IMSIZE Edge size of the postagestamp in arcmin (default: - 30.0) - --maxsep MAXSEP Maximum separation of source from beam centre in - degrees. (default: 1.0) - --out-folder OUT_FOLDER - Name of the output directory to place all results in. - (default: find_sources_output_20191115_18:33:09) - --source-names SOURCE_NAMES - Only for use when entering coordaintes via the command - line. State the name of the source being searched. Use - quote marks for names that contain a space. For - multiple sources separate with a comma with no space, - e.g. "SN 1994N,SN 2003D,SN 2019A" (default: ) - --crossmatch-radius CROSSMATCH_RADIUS - Crossmatch radius in arcseconds (default: 15.0) - --use-tiles Use the individual tiles instead of combined mosaics. - (default: False) - --img-folder IMG_FOLDER - Path to folder where images are stored (default: None) - --rms-folder RMS_FOLDER - Path to folder where image RMS estimates are stored - (default: None) - --cat-folder CAT_FOLDER - Path to folder where selavy catalogues are stored - (default: None) - --create-png Create a png of the fits cutout. (default: False) - --png-selavy-overlay Overlay selavy components onto the png image. - (default: False) - --png-linear-percentile PNG_LINEAR_PERCENTILE - Choose the percentile level for the png normalisation. - (default: 99.9) - --png-use-zscale Select ZScale normalisation (default is 'linear'). - (default: False) - --png-zscale-contrast PNG_ZSCALE_CONTRAST - Select contrast to use for zscale. (default: 0.1) - --png-no-island-labels - Disable island lables on the png. (default: False) - --png-ellipse-pa-corr PNG_ELLIPSE_PA_CORR - Correction to apply to ellipse position angle if - needed (in deg). Angle is from x-axis from left to - right. (default: 0.0) - --png-no-colorbar Do not show the colorbar on the png. (default: False) - --ann Create a kvis annotation file of the components. - (default: False) - --reg Create a DS9 region file of the components. (default: - False) - --stokesv Use Stokes V images and catalogues. Works with - combined images only! (default: False) - --quiet Turn off non-essential terminal output. (default: - False) - --crossmatch-only Only run crossmatch, do not generate any fits or png - files. (default: False) - --selavy-simple Only include flux density and uncertainty from selavy - in returned table. (default: False) - --process-matches Only produce data products for sources that have a - match from selavy. (default: False) - --debug Turn on debug output. (default: False) - --no-background-rms Do not estimate the background RMS around each source. - (default: False) - --find-fields Only return the associated field for each source. - (default: False) - --vast-pilot Query the VAST Pilot instead of RACS. Input is the - epoch number of the VAST pilot. (default: None) -``` - -### Inputs - -The scipt take one main input which is coordinates, either direct in the command line or using an input csv file. - -#### Command line: Single Coordinate -Here the format can be either in Hours or decimal degrees: -* `"HH:MM:SS.ss +/-DD:MM:SS.ss"` -* `"DDD.ddd +/-DD.ddd"` -Note the space between the coodinates and the quotation marks. - -E.g. -``` -python find_sources.py "22:37:5.6000 +34:24:31.90" -``` -``` -python find_sources.py "339.2733333 34.4088611" -``` - -It's recommended to provide a source name using the option `--source-names`, e.g. -``` -python find_sources.py "22:37:5.6000 +34:24:31.90" --source-names "SN 2014C" -``` - - -#### Command line: Multiple Coordinates -Same format as above but now separate coodinates with `,`: -* `"HH:MM:SS.ss +/-DD:MM:SS.ss,HH:MM:SS.ss +/-DD:MM:SS.ss,HH:MM:SS.ss +/-DD:MM:SS.ss"` -* `"DDD.ddd +/-DD.ddd,DDD.ddd +/-DD.ddd,DDD.ddd +/-DD.ddd"` - -Note there is no space between the commas. +## Current Scripts +The current avaialble scripts are: -E.g. -``` -python find_sources.py "22:37:5.6000 +34:24:31.90,22:37:5.6000 -34:24:31.90,13:37:5.6000 -84:24:31.90" -``` -``` -python find_sources.py "339.2733333 34.4088611,154.2733333 -34.4088611,20.2733333 -54.4088611" -``` - -Source names can still be defined using the option `--source-names` with the same comma notation e.g. - -``` -python find_sources.py "22:37:5.6000 +34:24:31.90,22:37:5.6000 -34:24:31.90,13:37:5.6000 -84:24:31.90" --source-names "SN 2014C,SN 2012C,SN2019B" -``` - -#### Input CSV file -To crossmatch many coordinates it's recommended to use a csv. Instead of entering coordaintes enter the name of the csv. The `--source-names` options is not used with CSV files. - -E.g. -``` -python find_sources.py my_coords.csv -``` - -The columns `ra` and `dec` are required and can be in either of the formats shown in the command line options. `name` is also accepted and is recommended. E.g. -``` -ra,dec,name -123.45,-67.89,source name -``` - -See `input_example.csv`. - -### Examples - -Search for a match to one source and create a FITS postage stamp of 5 arcminutes across. Will place the output in `example_source`. - -``` -find_sources.py "22:37:5.6000 +34:24:31.90" --imsize 5.0 --source-names "SN 2014C" --out-folder example_source -``` - -To include a png output with selavy overlay: - -``` -find_sources.py "22:37:5.6000 +34:24:31.90" --imsize 5.0 --source-names "SN 2014C" --out-folder example_source --create-png --png-selavy-overlay -``` -Now search in Stokes V to a different directory and also include a kvis annotation file and an extra coodinate: -``` -find_sources.py "22:37:5.6000 +34:24:31.90,22:37:5.6000 +44:24:31.90" --imsize 5.0 --source-names "SN 2014C,SN 2019I" --out-folder example_source_stokesv_ --create-png --png-selavy-overlay --stokesv --ann -``` -Search through a csv of coordinates, make pngs, use zscale with a contrast of 0.2, create annotation and region files.: -``` -find_sources.py my_coords.csv --imsize 5.0 --out-folder example_source --create-png --png-selavy-overlay --png-use-zscale --png-zscale-contrast 0.2 --ann --reg -``` +* **find\_sources.py** - A tool to swiftly search RACS and VAST Pilot data at chosen coordaintes. + - See [FINDSOURCES.md](FINDSOURCES.md) for full instructions. +* **get\_vast\_pilot\_dbx.py** - A script to allow simpler downloading of the VAST Pilot survey from Dropbox. + - See [VASTDROPBOX.md](VASTDROPBOX.md) for full instructions. diff --git a/VASTDROPBOX.md b/VASTDROPBOX.md new file mode 100644 index 00000000..fbe36793 --- /dev/null +++ b/VASTDROPBOX.md @@ -0,0 +1,136 @@ +# get\_vast\_pilot\_dbx.py + +This script allows for simple downloading of the VAST Pilot survey from Dropbox. + +Features: +* Generate a list of available files. +* State which Epochs are available. +* Download an entire Epoch. +* Download user requested files. + +## Prerequisites + +**This script requires you to have a Dropbox App 'access token'.** You do this by making an 'app' on your Dropbox account and then generating an OAuth token for that app. + +This tutorial shows you how to obtain one: http://99rabbits.com/get-dropbox-access-token/. Make sure you select the `Full Dropbox` option in the access section. + +Otherwise the requirements installed from the main repo will cover all the python needs. + +You also need to know the shared Dropbox URL of the Pilot survey and the password. + +## Usage +``` +usage: get_vast_pilot_dbx.py [-h] [--output OUTPUT] [--available-epochs] + [--available-files] + [--download-epoch DOWNLOAD_EPOCH] + [--files-list FILES_LIST] [--overwrite] [--debug] + [--dropbox-config DROPBOX_CONFIG] + [--write-template-dropbox-config] + +optional arguments: + -h, --help show this help message and exit + --output OUTPUT Name of the local output directory where files will be + saved (default: vast_dropbox) + --available-epochs Print out what Epochs are available. (default: False) + --available-files Print out a list of available files on the shared + folder. (default: False) + --download-epoch DOWNLOAD_EPOCH + Select to download an entire Epoch directory. Enter as + an integer. (default: 0) + --files-list FILES_LIST + Input of files to fetch. (default: None) + --overwrite Overwrite any files that already exist in the output + directory. (default: False) + --debug Set logging level to debug. (default: False) + --dropbox-config DROPBOX_CONFIG + Dropbox config file to be read in containing the + shared url, password and access token. A template can + be generated using --write-template-dropbox-config. + (default: dropbox.cfg) + --write-template-dropbox-config + Create a template dropbox config file. (default: + False) + +``` + +To run the script needs a Dropbox configuration file, which by default is assumed to be named 'dropbox.cfg'. Create a text file in the following format and enter the respective values: +``` +[dropbox] +shared_url = ENTER_URL +password = ENTER_PASSWORD +access_token = ENTER_ACCESS_TOKEN +``` +There is no need to put quotes around the strings. A template can be generated by using: +``` +get_vast_pilot_dbx.py --write-template-dropbox-config +``` +Use the option `--dropbox-config` if your config file is named something different than the default. + +A log file will be saved for every run of the script. + +### Modes + +There are 4 different ways the script can be used: + +1. `--available-epochs` will only display the currently released epochs. Nothing will be downloaded. +2. `--available-files` will generate a complete list of all the files avaialble. This is helpful in order to build your own list of files you wish to fetch. Nothing will be downloaded. +3. `--download-epoch` will download an entire Epoch directory of your choosing. +4. `--files-list` defines a text file that contains the files you wish to download. Help on this is below. + +Modes 3 and 4 will all place results in an output directory. The name of the directory can be set with `--output`. + +Take note of the **overwrite** option. By default this is set to `False` such that it will skip files already present in the output directory. Using this option will download all files and overwrite any exisiting files if they are already present. + +#### User Files List +When supplying a list of files it needs to follow the directory structure of the Dropbox. It also needs to explictly state the files - i.e. you **cannot use wildcards** (sorry it's the limitations of using Dropbox this way). + +For example if I wanted to download a set of STOKES I COMBINED images from EPOCH01, the file would be: +``` +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_0918+00A.EPOCH01.I.fits +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_1739-25A.EPOCH01.I.fits +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_1753-18A.EPOCH01.I.fits +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_0943+00A.EPOCH01.I.fits +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_0216+00A.EPOCH01.I.fits +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_2143-06A.EPOCH01.I.fits +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_2208-06A.EPOCH01.I.fits + +``` +Note the leading `/` which is also needed. + +I recommened you run `python get_vast_pilot_dbx.py dropbox.cfg --available-files` and use this output to build your request. + +### Examples + +#### Obtaining a List of all available files +``` +python get_vast_pilot_dbx.py --available-files +``` +This will generate a text file containing a list of available files. It will be named with a timestamp. + +#### Downloading an entire epoch +Using epoch 01 as an example: +``` +python get_vast_pilot_dbx.py --download-epoch 1 --output VAST_DOWNLOAD +``` +This will place the EPOCH01 directory in `VAST_DOWNLOAD`. + +#### Downloading a user selected set of files + +1. Create the text file containing the files, e.g. `to_download.txt`: +``` +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_0918+00A.EPOCH01.I.fits +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_1739-25A.EPOCH01.I.fits +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_1753-18A.EPOCH01.I.fits +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_0943+00A.EPOCH01.I.fits +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_0216+00A.EPOCH01.I.fits +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_2143-06A.EPOCH01.I.fits +/EPOCH01/COMBINED/STOKESI_IMAGES/VAST_2208-06A.EPOCH01.I.fits + +``` +2. Then run with: +``` +python get_vast_pilot_dbx.py --files-list to_download.txt --output VAST_DOWNLOAD +``` +This will place these files in `VAST_DOWNLOAD`. The directory structure will be mimiced. + + diff --git a/requirements.txt b/requirements.txt index 46c6d5fc..e8ed918b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ numpy>=1.16.4 scipy>=1.3.1 astropy>=3.2.1 matplotlib>=3.1.1 -colorlog>=4.0.2 \ No newline at end of file +colorlog>=4.0.2 +dropbox>=9.4.0 diff --git a/vasttools/get_vast_pilot_dbx.py b/vasttools/get_vast_pilot_dbx.py new file mode 100755 index 00000000..a325f1c2 --- /dev/null +++ b/vasttools/get_vast_pilot_dbx.py @@ -0,0 +1,322 @@ +#!/usr/bin/env python + +import argparse +import dropbox +import os +import sys +import datetime +import configparser +import numpy as np +import itertools + +import logging +import logging.handlers +import logging.config + +try: + import colorlog + use_colorlog=True +except ImportError: + use_colorlog=False + +def recursive_build_files(base_file_list, dbx, preappend=""): + ''' + Very annoyingling recursive file lists do not work on shared folders. + This function is to fetch every single file available by iterating over all folders found + to build up a unique file list. It's a recursive file builder. + + :param base_file_list: a list of files in the root dropbox folder + :type base_file_list: + :param dbx: + :type dbx: + :param preappend: defaults to an empty str + :type preappend: str, optional + + :returns: lists of all folders files in the dropbox + :rtype: list, list + ''' + + folders=[] + searched_folders=[] + files=[] + for i in base_file_list.entries: + if type(i) == dropbox.files.FolderMetadata: + if preappend=="": + folders.append("/{}".format(i.name)) + else: + folders.append("/{}/{}".format(preappend, i.name)) + else: + if preappend=="": + files.append("/{}".format(i.name)) + else: + files.append("/{}/{}".format(preappend, i.name)) + + while folders != searched_folders: + for i in folders: + if logger.level!=10: + sys.stdout.write(next(spinner)) # write the next character + sys.stdout.flush() # flush stdout buffer (actual character display) + sys.stdout.write('\b') + if i not in searched_folders: + these_files = dbx.files_list_folder("/{}".format(i), shared_link=shared_link) + for j in these_files.entries: + if type(j) == dropbox.files.FolderMetadata: + if preappend=="" or i.startswith("/{}".format(preappend)): + folders.append("{}/{}".format(i, j.name)) + else: + folders.append("/{}/{}/{}".format(preappend, i, j.name)) + else: + if preappend=="" or i.startswith("/{}".format(preappend)): + files.append("{}/{}".format(i, j.name)) + else: + files.append("/{}/{}/{}".format(preappend, i, j.name)) + searched_folders.append(i) + logger.debug("Searched {}".format(i)) + logger.debug("Folders: {}".format(folders)) + logger.debug("Searched Folders: {}".format(searched_folders)) + sys.stdout.flush() # flush stdout buffer (actual character display) + logger.info("Finished!") + return files, folders + +def download_files(files_list, pwd, output_dir, dbx, shared_url, password, overwrite=False): + ''' + Iterate over a list of files and download them from the dropbox folder + + :param files_list: + :type files_list: + :param pwd: + :type pwd: + :param output_dir: + :type output_dir: + :param dbx: + :type dbx: + :param shared_url: + :type shared_url: + :param password: + :type password: + :param overwrite: whether to overwrite existing files, defaults to False + :type overwrite: bool, optional + ''' + + for vast_file in files_list: + download_path = os.path.join(pwd, output_dir, vast_file[1:]) + if not overwrite: + if os.path.isfile(download_path): + logger.error("{} already exists and overwrite is set to {}.".format(download_path, overwrite)) + logger.info("Skipping file.") + continue + dropbox_path = "{}".format(vast_file) + logger.debug("Download path: {}".format(download_path)) + logger.info("Downloading {}...".format(dropbox_path)) + dbx.sharing_get_shared_link_file_to_file(download_path, shared_url, path=dropbox_path, link_password=password) + + +def check_dir(directory): + ''' + Wrapper for os.path.isdir() + + :param directory: path to directory we're checking the existence of + :type directory: str + + :returns: True if the specified path is an existing directory, False otherwise + :rtype: bool + ''' + + return os.path.isdir(directory) + +def check_file(file_to_check): + ''' + Wrapper for os.path.isfile() + + :param file_to_check: path to file we're checking the existence of + :type file_to_check: str + + :returns: True if the specified path is an existing file, False otherwise + :rtype: bool + ''' + + return os.path.isfile(file_to_check) + +parser=argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +parser.add_argument('--output', type=str, help='Name of the local output directory where files will be saved', default="vast_dropbox") +parser.add_argument('--available-epochs', action="store_true", help='Print out what Epochs are available.') +parser.add_argument('--available-files', action="store_true", help='Print out a list of available files on the shared folder.') +parser.add_argument('--download-epoch', type=int, help='Select to download an entire Epoch directory. Enter as an integer.', default=0) +parser.add_argument('--files-list', type=str, help='Input of files to fetch.', default=None) +parser.add_argument('--overwrite', action="store_true", help='Overwrite any files that already exist in the output directory.') +parser.add_argument('--debug', action="store_true", help='Set logging level to debug.') +parser.add_argument('--dropbox-config', type=str, help='Dropbox config file to be read in containing the shared url, password and access token. A template \ +can be generated using --write-template-dropbox-config.', default="dropbox.cfg") +parser.add_argument('--write-template-dropbox-config', action="store_true", help='Create a template dropbox config file.') + +args=parser.parse_args() + +now = datetime.datetime.now() +now_str = now.strftime("%Y%m%d_%H:%M:%S") + +logger = logging.getLogger() +s = logging.StreamHandler() +logformat='[%(asctime)s] - %(levelname)s - %(message)s' + +if use_colorlog: + formatter = colorlog.ColoredFormatter( + "%(log_color)s[%(asctime)s] - %(levelname)s - %(blue)s%(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + reset=True, + log_colors={ + 'DEBUG': 'cyan', + 'INFO': 'green', + 'WARNING': 'yellow', + 'ERROR': 'red', + 'CRITICAL': 'red,bg_white', + }, + secondary_log_colors={}, + style='%' + ) +else: + formatter = logging.Formatter(logformat, datefmt="%Y-%m-%d %H:%M:%S") + +s.setFormatter(formatter) +logger.addHandler(s) + +logfilename = "get_vast_pilot_dbx_{}.log".format(now_str) +fileHandler = logging.FileHandler(logfilename) +fileHandler.setFormatter(logging.Formatter(logformat, datefmt="%Y-%m-%d %H:%M:%S")) +logger.addHandler(fileHandler) + +if args.debug: + logger.setLevel(logging.DEBUG) +else: + logger.setLevel(logging.INFO) + +logging.getLogger("dropbox").setLevel(logging.WARNING) + +if args.write_template_dropbox_config: + config_file = "dropbox.cfg" + with open(config_file, "w") as f: + f.write("[dropbox]\n") + f.write("shared_url = ENTER_URL\n") + f.write("password = ENTER_PASSWORD\n") + f.write("access_token = ENTER_ACCESS_TOKEN\n") + + logger.info("Writen an example dropbox config file to '{}'.".format(config_file)) + sys.exit() + +if not check_file(args.dropbox_config): + logger.critical("Cannot find dropbox config file '{}!".format(args.dropbox_config)) + logger.info("A template dropbox file can be generated using 'python get_vast_pilot_dbx.py --write-template-dropbox-config'") + sys.exit() + +config = configparser.ConfigParser() +config.read(args.dropbox_config) + +shared_url = config["dropbox"]["shared_url"] +password = config["dropbox"]["password"] +access_token = config["dropbox"]["access_token"] + +logger.debug("Shared URL: {}".format(shared_url)) +logger.debug("Password: {}".format(password != "")) + +output_dir = args.output + +#check dir +if not args.available_epochs and not args.available_files: + if check_dir(output_dir): + logger.warning("Output directory '{}' already exists!".format(output_dir)) + logger.warning("Files may get overwritten!") + else: + os.mkdir(output_dir) + + +dbx = dropbox.Dropbox(access_token) + +shared_link = dropbox.files.SharedLink(url=shared_url, password=password) + +base_file_list = dbx.files_list_folder("", shared_link=shared_link) + +spinner = itertools.cycle(['-', '/', '|', '\\']) + +if args.available_epochs: + logger.info("The following epochs are available:") + for i in base_file_list.entries: + if type(i) == dropbox.files.FolderMetadata and "EPOCH" in i.name: + logger.info(i.name) + +elif args.available_files: + logger.info("Gathering a list of files - this will take approximately 4 minutes per epoch.") + files_list, folders_list = recursive_build_files(base_file_list, dbx) + logger.info("Found {} files.".format(len(files_list))) + vast_list_file_name = "vast_dbx_file_list_{}.txt".format(now_str) + with open(vast_list_file_name, "w") as f: + f.write("# File list on VAST Pilot survey dropbox as of {}\n".format(now)) + [f.write(i+"\n") for i in files_list] + logger.info("All available files written to {}".format(vast_list_file_name)) + +elif args.download_epoch != 0: + epochs = [] + for i in base_file_list.entries: + if type(i) == dropbox.files.FolderMetadata and "EPOCH" in i.name: + epochs.append(int(i.name.split('EPOCH')[-1])) + if args.download_epoch not in epochs: + logger.error("EPOCH{:02d} has not yet been released!".format(args.download_epoch)) + sys.exit() + else: + epoch_string = "EPOCH{:02d}".format(args.download_epoch) + epoch_file_list = dbx.files_list_folder("/{}".format(epoch_string), shared_link=shared_link) + logger.info("Gathering {} files to download, please wait...".format(epoch_string)) + files_list, folders_list = recursive_build_files(epoch_file_list, dbx, preappend=epoch_string) + logger.info("{} files to download".format(len(files_list))) + + for folder in folders_list: + os.makedirs(os.path.join(output_dir, folder[1:]), exist_ok=True) + logger.info("Downloading files for {}...".format(epoch_string)) + download_files(files_list, os.getcwd(), output_dir, dbx, shared_url, password) + +elif args.files_list!=None: + if not check_file(args.files_list): + logger.error("Supplied file '{}' not found!".format(args.files_list)) + sys.exit() + with open(args.files_list, 'r') as f: + userlines = f.readlines() + + # check files start with / and ignore # + files_to_download = [] + + for i in userlines: + if i.startswith("#"): + continue + else: + if i.startswith("/"): + files_to_download.append(i.strip()) + else: + files_to_download.append("/{}".format(i.strip())) + + dirs_to_create = np.unique(["/".join(i.strip().split("/")[1:-1]) for i in files_to_download]) + + for i in dirs_to_create: + if i=="": + continue + os.makedirs(os.path.join(output_dir, i), exist_ok=True) + + logger.info("Downloading {} files from '{}'...".format(len(files_to_download), args.files_list)) + download_files(files_to_download, os.getcwd(), output_dir, dbx, shared_url, password, overwrite=args.overwrite) + +else: + logger.info("Nothing to be done!") + +end = datetime.datetime.now() + +runtime = end-now + +logger.info("Ran for {:.1f} minutes.".format(runtime.seconds/60.)) + +logger.info("Log file written to {}".format(logfilename)) + +logger.info("All done!") + + + + + +