From 947cdd01882ecf435a4307a5a8f370a82ea2e604 Mon Sep 17 00:00:00 2001 From: Phaeton <808865+Phaeton@users.noreply.github.com> Date: Tue, 23 Jan 2024 09:09:40 -0500 Subject: [PATCH] Add sort actions to the main database, reference files, and derivative files. (#451) * Add sorting to create_db_derivatives action. * black linted --- scripts/create_db_derivatives.py | 48 ++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/scripts/create_db_derivatives.py b/scripts/create_db_derivatives.py index a8214fa7..506f25ed 100644 --- a/scripts/create_db_derivatives.py +++ b/scripts/create_db_derivatives.py @@ -1,7 +1,9 @@ """This script creates (derivative) category and images CSV database files from the main 'plane-alert-db.csv' database file. The categories are created based on the 'CMPG' column, while images are added using the 'plane_images.csv' reference file. It also -creates the 'plane-alert-ukraine-images.csv' database file. +creates the 'plane-alert-ukraine-images.csv' database file. Lastly, based on the +'plane-alert-db.csv' database file, missing records are added and/or extra records are +removed from the 'plane_images.csv' reference file. """ import logging @@ -14,12 +16,30 @@ if __name__ == "__main__": logging.info("Reading the main csv file...") - df = pd.read_csv("plane-alert-db.csv") - logging.info("Main csv file read successfully.") + unsort_df = pd.read_csv("plane-alert-db.csv") + df = unsort_df.sort_values(by=["$ICAO"], ascending=True) + df.to_csv( + "plane-alert-db.csv", + mode="wb", + index=False, + header=True, + encoding="utf8", + lineterminator="\n", + ) + logging.info("Main csv file read and sorted successfully.") logging.info("Reading the images reference file...") - images_df = pd.read_csv("plane_images.csv") - logging.info("Images reference file read successfully.") + unsort_images_df = pd.read_csv("plane_images.csv") + images_df = unsort_images_df.sort_values(by=["$ICAO"], ascending=True) + images_df.to_csv( + "plane_images.csv", + mode="wb", + index=False, + header=True, + encoding="utf8", + lineterminator="\n", + ) + logging.info("Images reference file read and sorted successfully.") logging.info("Creating the category and category images CSV files...") for category in df["#CMPG"].unique(): @@ -49,8 +69,19 @@ ) logging.info("Category and category images CSV files created successfully.") - logging.info("Creating the ukraine database images CSV file...") - ukraine_df = pd.read_csv("plane-alert-ukraine.csv") + logging.info("Reading the Ukraine csv file...") + unsort_ukraine_df = pd.read_csv("plane-alert-ukraine.csv") + ukraine_df = unsort_ukraine_df.sort_values(by=["$ICAO"], ascending=True) + ukraine_df.to_csv( + "plane-alert-ukraine.csv", + mode="wb", + index=False, + header=True, + encoding="utf8", + lineterminator="\n", + ) + logging.info("Ukraine csv file read and sorted successfully.") + logging.info("Creating the Ukraine database images CSV file...") ukraine_df_images = pd.merge(ukraine_df, images_df, how="left", on="$ICAO") ukraine_df_images.to_csv( "plane-alert-ukraine-images.csv", @@ -98,7 +129,8 @@ how="outer", on="$ICAO", ) - plane_images_df.to_csv( + sort_plane_images_df = plane_images_df.sort_values(by=["$ICAO"], ascending=True) + sort_plane_images_df.to_csv( "plane_images.csv", mode="wb", index=False,