Skip to content

Commit

Permalink
import modules comments added
Browse files Browse the repository at this point in the history
  • Loading branch information
gaaniruddha committed Dec 19, 2024
1 parent e1b9c73 commit a25f845
Showing 1 changed file with 70 additions and 49 deletions.
119 changes: 70 additions & 49 deletions dlg-code/wallaby_hires.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,46 @@
Description: All functions neeeded for the WALLABY hires pipeline to process the HIPASS sources
"""

## Importing required modules
# Importing required modules

# Used here for functions to interact with the operating system such as os.path, os.mkdirs, os.getcwd etc
import os

# Access and interact with Python runtime environment
import sys
import logging

# To parse and manipulate JSON data
import json

# To fetch or handle URLS and their contents
import urllib
import urllib.request

# To support asynchronous programming.
import asyncio

# To parse command-line arguments
import argparse

# Core package for astronomy & astrophysics
import astropy
from astropy.table import Table

# To work with config files
import configparser

# Interface with TAP (Table Access Protocol) services for querying databases
from astroquery.utils.tap.core import TapPlus

# Interface with CASDA i.e. CSIRO ASKAP Data Archive
from astroquery.casda import Casda

# To execute tasks asynchronously with threads or processes
import concurrent.futures

# Used here to see how much time a given code segment took to run
import time
from astropy.table import Table

import urllib.request

# Used here to work with CSV files
import csv
Expand All @@ -34,9 +55,11 @@

import requests

# Used here
# Used here to create a copy of a given dictionary
import copy

# Provides a flexible framework for logging messages
import logging
logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)

Expand Down Expand Up @@ -811,26 +834,26 @@ def process_and_download_data_same_folder(credentials, input_csv, processed_cata

# COMMENT: this section on/off while testing on laptop

# # Stage data for download
# url_list = casda.stage_data(res, verbose=True)
# print(f"Staging data URLs for {name}")

# # Download files concurrently in the current working directory
# file_list = []
# with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
# futures = [
# executor.submit(download_file, url=url, check_exists=True, output='.', timeout=timeout_seconds)
# for url in url_list if not url.endswith('checksum')
# ]

# for future in concurrent.futures.as_completed(futures):
# file_list.append(future.result())

# # Untar files in the current working directory
# print(f"Untarring files for: {name}")
# for file in file_list:
# if file.endswith('.tar') and tarfile.is_tarfile(file):
# untar_file(file, '.')
# Stage data for download
url_list = casda.stage_data(res, verbose=True)
print(f"Staging data URLs for {name}")

# Download files concurrently in the current working directory
file_list = []
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = [
executor.submit(download_file, url=url, check_exists=True, output='.', timeout=timeout_seconds)
for url in url_list if not url.endswith('checksum')
]

for future in concurrent.futures.as_completed(futures):
file_list.append(future.result())

# Untar files in the current working directory
print(f"Untarring files for: {name}")
for file in file_list:
if file.endswith('.tar') and tarfile.is_tarfile(file):
untar_file(file, '.')

output_df = pd.DataFrame(output_data, columns=['Name', 'RA', 'DEC', 'Vsys'])
output_csv = os.path.join('.', 'hipass_ms_file_details.csv')
Expand Down Expand Up @@ -973,7 +996,6 @@ def mosaic():
print(f"Output file created: {filename}")
print(f"Output file created: {weights_filename}")


# Code for the downloading evaluation Files workflow

# HIPASS Query with filename pattern
Expand Down Expand Up @@ -1232,7 +1254,7 @@ def process_and_download(credentials, input_csv, processed_catalogue, timeout_se
for idx, url in enumerate(download_url_list, start=1):
print(f"- link {idx}: {url}")

# Comment this section while testing on laptop
# COMMENT: this section on/off while testing on laptop (DOWNLOADS evaluation files)
# Download the required files
# Define the download directory as the current working directory
download_dir = os.getcwd()
Expand Down Expand Up @@ -1267,7 +1289,7 @@ def process_and_download(credentials, input_csv, processed_catalogue, timeout_se
print(f"Extracted '{tar_file}' to '{tar_file_folder_name}'")

# Get RA, DEC, and Vsys from the query
res = tap_query_RA_DEC_VSYS(name)
res = tap_query_RA_DEC_VSYS(name)

# Assuming res returns a DataFrame with the required values, extract them
if not res or len(res) == 0:
Expand Down Expand Up @@ -1309,29 +1331,28 @@ def process_and_download(credentials, input_csv, processed_catalogue, timeout_se
print(f"File {new_filename} added to i/p for pipeline part B")
output_data.append([new_filename, f"{ra_h}: {ra_m}: {ra_s:.2f}", f"{dec_d}: {dec_m}: {dec_s:.2f}", vsys])

# COMMENT: this section on/off while testing on laptop
# COMMENT: this section on/off while testing on laptop (DOWNLOADS .ms files)

# # Stage data for download
# url_list = casda.stage_data(res, verbose=True)
# print(f"Staging data URLs for {name}")

# # Download files concurrently in the current working directory
# file_list = []
# with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
# futures = [
# executor.submit(download_file, url=url, check_exists=True, output='.', timeout=timeout_seconds)
# for url in url_list if not url.endswith('checksum')
# ]

# for future in concurrent.futures.as_completed(futures):
# file_list.append(future.result())

# # Untar files in the current working directory
# print(f"Untarring files for: {name}")
# for file in file_list:
# if file.endswith('.tar') and tarfile.is_tarfile(file):
# untar_file(file, '.')
# Stage data for download
url_list = casda.stage_data(res, verbose=True)
print(f"Staging data URLs for {name}")

# Download files concurrently in the current working directory
file_list = []
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = [
executor.submit(download_file, url=url, check_exists=True, output='.', timeout=timeout_seconds)
for url in url_list if not url.endswith('checksum')
]

for future in concurrent.futures.as_completed(futures):
file_list.append(future.result())

# Untar files in the current working directory
print(f"Untarring files for: {name}")
for file in file_list:
if file.endswith('.tar') and tarfile.is_tarfile(file):
untar_file(file, '.')

# Creates a df with with filename, RA, DEC and System Velocity
output_df = pd.DataFrame(output_data, columns=['Name', 'RA', 'DEC', 'Vsys'])
Expand Down

0 comments on commit a25f845

Please sign in to comment.