-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add guvcat_load.py, guvcat_pkey_indexes.sql * flake8: guvcat_load.py
- Loading branch information
1 parent
5f96ef9
commit e219f99
Showing
2 changed files
with
98 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# Program: guvcat_load.py | ||
# Aim: load guvcat csv files into the postgreSQL sdss5db database. | ||
# We use this on pipelines.sdss.org instead of guvcat_load | ||
# since pipelines does not have the command parallel. | ||
# | ||
# The program checks so that it does not reload csv files which already | ||
# have a csv.load.out file. | ||
|
||
import glob | ||
import os.path | ||
|
||
|
||
DEBUG = False | ||
|
||
# Note that csv_dir and csvout_dir must end with / | ||
|
||
csv_dir = "/uufs/chpc.utah.edu/common/home/sdss50/sdsswork/target/catalogs/guvcat/fov055/src/" # noqa E501 | ||
csvout_dir = "/uufs/chpc.utah.edu/common/home/sdss50/sdsswork/target/catalogs/guvcat/fov055/src/csvout_pipelines/" # noqa E501 | ||
|
||
list_of_csv_files = glob.glob(csv_dir + "GUVCat*.csv") | ||
list_of_csv_files.sort() | ||
|
||
fout = open(csvout_dir + "load.csv.out", "a") | ||
|
||
if DEBUG is True: | ||
list_of_csv_files = [csv_dir + "GUVCat_AIS_glat00_05N_fov055_wExtObjFlag.csv"] # noqa E501 | ||
|
||
for i in range(len(list_of_csv_files)): | ||
full_csv_file = list_of_csv_files[i] | ||
csv_file = os.path.basename(full_csv_file) | ||
csvout_file = csv_file.replace('.csv', '.csv.out') | ||
csvsql_file = csv_file.replace('.csv', '.csv.sql') | ||
|
||
# both csvout_file and csvsql_file are put in csvout_dir directory | ||
full_csvout_file = csvout_dir + csvout_file | ||
full_csvsql_file = csvout_dir + csvsql_file | ||
# if csv file exists then skip this csv file and goto next csv file | ||
if os.path.isfile(full_csvout_file): | ||
print("skipping loading csv file since csv.out file already exists:", | ||
csvout_file) | ||
print("skipping loading csv file since csv.out file already exists:", | ||
csvout_file, | ||
file=fout, flush=True) | ||
continue | ||
|
||
fpgscript = open(full_csvsql_file, "w") | ||
print("\\o " + full_csvout_file, file=fpgscript) | ||
print("\\copy catalogdb.guvcat ", file=fpgscript, end='') | ||
print("from '" + full_csv_file + "' ", file=fpgscript, end='') | ||
print(" with csv header; ", file=fpgscript) | ||
print("\\o", file=fpgscript) | ||
print("\\q", file=fpgscript) | ||
fpgscript.close() | ||
|
||
print("load start:", csv_file) | ||
print("load start:", csv_file, file=fout, flush=True) | ||
|
||
pgcopy_output = os.popen("psql -d sdss5db " + | ||
" -a -f " + full_csvsql_file).read() | ||
|
||
wc_output = os.popen("wc -l " + full_csv_file).read() | ||
num_lines_str, file_name = wc_output.split() | ||
num_lines = int(num_lines_str) | ||
# do not count the header line | ||
# so reduce num_lines by one | ||
num_lines = num_lines - 1 | ||
print(csv_file, ":contains:", num_lines) | ||
print(csv_file, ":contains:", num_lines, file=fout, flush=True) | ||
|
||
fcsvout = open(full_csvout_file, "r") | ||
line = fcsvout.readline() | ||
copytext, num_rows_loaded_str = line.split() | ||
num_rows_loaded = int(num_rows_loaded_str) | ||
print(csvout_file, ":loaded:", num_rows_loaded) | ||
print(csvout_file, ":loaded:", num_rows_loaded, file=fout, flush=True) | ||
fcsvout.close() | ||
|
||
if (num_lines != num_rows_loaded): | ||
print("load error:num_lines!=num_rows_loaded", csv_file) | ||
print("load error:num_lines!=num_rows_loaded", | ||
csv_file, file=fout, flush=True) | ||
|
||
print("load end:", csv_file) | ||
print("load end:", csv_file, file=fout, flush=True) | ||
|
||
print("loaded all csv files") | ||
print("loaded all csv files", file=fout, flush=True) | ||
|
||
fout.close() |
9 changes: 9 additions & 0 deletions
9
schema/sdss5db/catalogdb/guvcat/fov055/guvcat_pkey_indexes.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
alter table catalogdb.guvcat add primary key (objid); | ||
|
||
create index on catalogdb.guvcat(q3c_ang2ipix(ra, "dec")); | ||
create index on catalogdb.guvcat((fuv_mag - nuv_mag)); | ||
create index on catalogdb.guvcat(fuv_mag); | ||
create index on catalogdb.guvcat(fuv_magerr); | ||
create index on catalogdb.guvcat(nuv_mag); | ||
create index on catalogdb.guvcat(nuv_magerr); | ||
|