Guvcat (#216)

* add guvcat_load.py, guvcat_pkey_indexes.sql * flake8: guvcat_load.py
sdss · Sep 6, 2023 · e219f99 · e219f99
1 parent 5f96ef9
commit e219f99
Show file tree

Hide file tree

Showing 2 changed files with 98 additions and 0 deletions.
diff --git a/schema/sdss5db/catalogdb/guvcat/fov055/guvcat_load.py b/schema/sdss5db/catalogdb/guvcat/fov055/guvcat_load.py
@@ -0,0 +1,89 @@
+# Program: guvcat_load.py
+# Aim: load guvcat csv files into the postgreSQL sdss5db database.
+#      We use this on pipelines.sdss.org instead of guvcat_load
+#      since pipelines does not have the command parallel.
+#
+# The program checks so that it does not reload csv files which already
+# have a csv.load.out file.
+
+import glob
+import os.path
+
+
+DEBUG = False
+
+# Note that csv_dir and csvout_dir must end with /
+
+csv_dir = "/uufs/chpc.utah.edu/common/home/sdss50/sdsswork/target/catalogs/guvcat/fov055/src/"  # noqa E501
+csvout_dir = "/uufs/chpc.utah.edu/common/home/sdss50/sdsswork/target/catalogs/guvcat/fov055/src/csvout_pipelines/"  # noqa E501
+
+list_of_csv_files = glob.glob(csv_dir + "GUVCat*.csv")
+list_of_csv_files.sort()
+
+fout = open(csvout_dir + "load.csv.out", "a")
+
+if DEBUG is True:
+    list_of_csv_files = [csv_dir + "GUVCat_AIS_glat00_05N_fov055_wExtObjFlag.csv"]  # noqa E501
+
+for i in range(len(list_of_csv_files)):
+    full_csv_file = list_of_csv_files[i]
+    csv_file = os.path.basename(full_csv_file)
+    csvout_file = csv_file.replace('.csv', '.csv.out')
+    csvsql_file = csv_file.replace('.csv', '.csv.sql')
+
+    # both csvout_file and csvsql_file are put in csvout_dir directory
+    full_csvout_file = csvout_dir + csvout_file
+    full_csvsql_file = csvout_dir + csvsql_file
+    # if csv file exists then skip this csv file and goto next csv file
+    if os.path.isfile(full_csvout_file):
+        print("skipping loading csv file since csv.out file already exists:",
+              csvout_file)
+        print("skipping loading csv file since csv.out file already exists:",
+              csvout_file,
+              file=fout, flush=True)
+        continue
+
+    fpgscript = open(full_csvsql_file, "w")
+    print("\\o " + full_csvout_file, file=fpgscript)
+    print("\\copy catalogdb.guvcat ", file=fpgscript, end='')
+    print("from '" + full_csv_file + "' ", file=fpgscript, end='')
+    print(" with csv header; ", file=fpgscript)
+    print("\\o", file=fpgscript)
+    print("\\q", file=fpgscript)
+    fpgscript.close()
+
+    print("load start:", csv_file)
+    print("load start:", csv_file, file=fout, flush=True)
+
+    pgcopy_output = os.popen("psql -d sdss5db " +
+                             " -a -f " + full_csvsql_file).read()
+
+    wc_output = os.popen("wc -l " + full_csv_file).read()
+    num_lines_str, file_name = wc_output.split()
+    num_lines = int(num_lines_str)
+    # do not count the header line
+    # so reduce num_lines by one
+    num_lines = num_lines - 1
+    print(csv_file, ":contains:", num_lines)
+    print(csv_file, ":contains:", num_lines, file=fout, flush=True)
+
+    fcsvout = open(full_csvout_file, "r")
+    line = fcsvout.readline()
+    copytext, num_rows_loaded_str = line.split()
+    num_rows_loaded = int(num_rows_loaded_str)
+    print(csvout_file, ":loaded:", num_rows_loaded)
+    print(csvout_file, ":loaded:", num_rows_loaded, file=fout, flush=True)
+    fcsvout.close()
+
+    if (num_lines != num_rows_loaded):
+        print("load error:num_lines!=num_rows_loaded", csv_file)
+        print("load error:num_lines!=num_rows_loaded",
+              csv_file, file=fout, flush=True)
+
+    print("load end:", csv_file)
+    print("load end:", csv_file, file=fout, flush=True)
+
+print("loaded all csv files")
+print("loaded all csv files", file=fout, flush=True)
+
+fout.close()
diff --git a/schema/sdss5db/catalogdb/guvcat/fov055/guvcat_pkey_indexes.sql b/schema/sdss5db/catalogdb/guvcat/fov055/guvcat_pkey_indexes.sql
@@ -0,0 +1,9 @@
+alter table catalogdb.guvcat add primary key (objid);
+
+create index on catalogdb.guvcat(q3c_ang2ipix(ra, "dec"));
+create index on catalogdb.guvcat((fuv_mag - nuv_mag));
+create index on catalogdb.guvcat(fuv_mag);
+create index on catalogdb.guvcat(fuv_magerr);
+create index on catalogdb.guvcat(nuv_mag);
+create index on catalogdb.guvcat(nuv_magerr);
+