diff --git a/.github/workflows/check_categories.yaml b/.github/workflows/check_categories.yaml new file mode 100644 index 00000000..8b0817f8 --- /dev/null +++ b/.github/workflows/check_categories.yaml @@ -0,0 +1,25 @@ +name: Check categories + +on: + pull_request: + paths: + - "plane-alert-db.csv" + - "plane-alert-pia.csv" + - "plane-alert-ukraine.csv" + - "plane_images.csv" + +jobs: + checkCategories: + runs-on: ubuntu-latest + name: Check if main databases have valid Categories + steps: + - name: Checkout + uses: actions/checkout@v4.1.7 + - uses: actions/setup-python@v5.1.1 + with: + python-version: "3.10" + cache: "pip" + - run: pip install -r ./scripts/requirements.txt + + - name: Run main database checks + run: python ./scripts/check_categories.py diff --git a/.github/workflows/create_db_derivatives.yaml b/.github/workflows/create_db_derivatives.yaml index 31448e0c..0e309d75 100644 --- a/.github/workflows/create_db_derivatives.yaml +++ b/.github/workflows/create_db_derivatives.yaml @@ -31,6 +31,9 @@ jobs: - name: Create category and images derivative CSV files run: python ./scripts/create_db_derivatives.py + - name: Create valid category listing + run: python ./scripts/export_categories.py + - name: Update README.md to include changes run: python ./scripts/update_readme.py diff --git a/plane-alert-categories.csv b/plane-alert-categories.csv new file mode 100644 index 00000000..3a00faae --- /dev/null +++ b/plane-alert-categories.csv @@ -0,0 +1,52 @@ +Category +Other Air Forces +Toy Soldiers +Flying Doctors +Dictator Alert +Governments +Ptolemy would be proud +Police Forces +Dogs with Jobs +Quango +Jump Johnny Jump +Aerial Firefighter +Climate Crisis +Jesus he Knows me +Big Hello +Other Navies +United States Navy +Coastguard +Bizjets +Da Comrade +Gunship +As Seen on TV +Oligarch +Oxcart +Distinctive +Joe Cool +Don't you know who I am? +Historic +You came here in that thing? +Hired Gun +Perfectly Serviceable Aircraft +Gas Bags +Vanity Plate +Watch Me Fly +Zoomies +GAF +Special Forces +Aerobatic Teams +UAV +UK National Police Air Service +RAF +Royal Navy Fleet Air Arm +Royal Aircraft +Army Air Corps +Football +Radiohead +Nuclear +USAF +PIA +CAP +United States Marine Corps +Ukraine diff --git a/scripts/check_categories.py b/scripts/check_categories.py new file mode 100644 index 00000000..7e3840a6 --- /dev/null +++ b/scripts/check_categories.py @@ -0,0 +1,46 @@ +""""script to flag new/invalid categories""" + +import logging +import pandas as pd +import sys + +logging.basicConfig( + format="%(asctime)s %(levelname)-8s [%(name)s] %(message)s", level=logging.INFO +) + +if __name__ == "__main__": + logging.info("Reading the main csv file...") + df = pd.read_csv("plane-alert-db.csv") + + logging.info("Reading the Ukraine csv file...") + ukraine_df = pd.read_csv("plane-alert-ukraine.csv") + + category_unique_df = ( + pd.concat([df["Category"], ukraine_df["Category"]]) + .drop_duplicates() + .reset_index(drop=False) + ) + category_unique_df = category_unique_df.drop('index', axis=1) + logging.info(f"Total Categories in PR Count: ({category_unique_df.shape[0]}).") + + logging.info("Reading the export category csv file...") + valid_df = pd.read_csv("plane-alert-categories.csv") + + if not valid_df.equals(category_unique_df): + logging.info("Invalid category used!") + merged_df = valid_df.merge(category_unique_df, indicator=True, how='outer') + changed_df = merged_df[merged_df['_merge'] == 'right_only'] + changed_df = changed_df.drop('_merge', axis=1) + + logging.info( + "New Categories found ({}):\n{}".format( + changed_df.shape[0], + changed_df.to_string(header=False, index=False), + ) + ) + + sys.stdout.write( + f"The files contain invalid or new Categories:\n" + ) + sys.exit(1) + logging.info("Categories check good!") \ No newline at end of file diff --git a/scripts/export_categories.py b/scripts/export_categories.py new file mode 100644 index 00000000..99024143 --- /dev/null +++ b/scripts/export_categories.py @@ -0,0 +1,31 @@ +""""script to export current categories""" + +import logging +import pandas as pd + +logging.basicConfig( + format="%(asctime)s %(levelname)-8s [%(name)s] %(message)s", level=logging.INFO +) + +if __name__ == "__main__": + logging.info("Reading the main csv file...") + df = pd.read_csv("plane-alert-db.csv") + + logging.info("Reading the Ukraine csv file...") + ukraine_df = pd.read_csv("plane-alert-ukraine.csv") + + category_unique_df = ( + pd.concat([df["Category"], ukraine_df["Category"]]) + .drop_duplicates() + .reset_index(drop=True) + ) + logging.info(f"Total Categories Count: ({category_unique_df.shape[0]}).") + + category_unique_df.to_csv( + "plane-alert-categories.csv", + mode="wb", + index=False, + header=True, + encoding="utf8", + lineterminator="\n", + ) \ No newline at end of file