Skip to content

Commit

Permalink
Add check for category to flag new/invalid submissions.
Browse files Browse the repository at this point in the history
  • Loading branch information
Phaeton committed Aug 13, 2024
1 parent 4958e4e commit 9d08d13
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 0 deletions.
25 changes: 25 additions & 0 deletions .github/workflows/check_categories.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Check categories

on:
pull_request:
paths:
- "plane-alert-db.csv"
- "plane-alert-pia.csv"
- "plane-alert-ukraine.csv"
- "plane_images.csv"

jobs:
checkCategories:
runs-on: ubuntu-latest
name: Check if main databases have valid Categories
steps:
- name: Checkout
uses: actions/[email protected]
- uses: actions/[email protected]
with:
python-version: "3.10"
cache: "pip"
- run: pip install -r ./scripts/requirements.txt

- name: Run main database checks
run: python ./scripts/check_categories.py
3 changes: 3 additions & 0 deletions .github/workflows/create_db_derivatives.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ jobs:
- name: Create category and images derivative CSV files
run: python ./scripts/create_db_derivatives.py

- name: Create valid category listing
run: python ./scripts/export_categories.py

- name: Update README.md to include changes
run: python ./scripts/update_readme.py

Expand Down
52 changes: 52 additions & 0 deletions plane-alert-categories.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
Category
Other Air Forces
Toy Soldiers
Flying Doctors
Dictator Alert
Governments
Ptolemy would be proud
Police Forces
Dogs with Jobs
Quango
Jump Johnny Jump
Aerial Firefighter
Climate Crisis
Jesus he Knows me
Big Hello
Other Navies
United States Navy
Coastguard
Bizjets
Da Comrade
Gunship
As Seen on TV
Oligarch
Oxcart
Distinctive
Joe Cool
Don't you know who I am?
Historic
You came here in that thing?
Hired Gun
Perfectly Serviceable Aircraft
Gas Bags
Vanity Plate
Watch Me Fly
Zoomies
GAF
Special Forces
Aerobatic Teams
UAV
UK National Police Air Service
RAF
Royal Navy Fleet Air Arm
Royal Aircraft
Army Air Corps
Football
Radiohead
Nuclear
USAF
PIA
CAP
United States Marine Corps
Ukraine
46 changes: 46 additions & 0 deletions scripts/check_categories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
""""script to flag new/invalid categories"""

import logging
import pandas as pd
import sys

logging.basicConfig(
format="%(asctime)s %(levelname)-8s [%(name)s] %(message)s", level=logging.INFO
)

if __name__ == "__main__":
logging.info("Reading the main csv file...")
df = pd.read_csv("plane-alert-db.csv")

logging.info("Reading the Ukraine csv file...")
ukraine_df = pd.read_csv("plane-alert-ukraine.csv")

category_unique_df = (
pd.concat([df["Category"], ukraine_df["Category"]])
.drop_duplicates()
.reset_index(drop=False)
)
category_unique_df = category_unique_df.drop('index', axis=1)
logging.info(f"Total Categories in PR Count: ({category_unique_df.shape[0]}).")

logging.info("Reading the export category csv file...")
valid_df = pd.read_csv("plane-alert-categories.csv")

if not valid_df.equals(category_unique_df):
logging.info("Invalid category used!")
merged_df = valid_df.merge(category_unique_df, indicator=True, how='outer')
changed_df = merged_df[merged_df['_merge'] == 'right_only']
changed_df = changed_df.drop('_merge', axis=1)

logging.info(
"New Categories found ({}):\n{}".format(
changed_df.shape[0],
changed_df.to_string(header=False, index=False),
)
)

sys.stdout.write(
f"The files contain invalid or new Categories:\n"
)
sys.exit(1)
logging.info("Categories check good!")
31 changes: 31 additions & 0 deletions scripts/export_categories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
""""script to export current categories"""

import logging
import pandas as pd

logging.basicConfig(
format="%(asctime)s %(levelname)-8s [%(name)s] %(message)s", level=logging.INFO
)

if __name__ == "__main__":
logging.info("Reading the main csv file...")
df = pd.read_csv("plane-alert-db.csv")

logging.info("Reading the Ukraine csv file...")
ukraine_df = pd.read_csv("plane-alert-ukraine.csv")

category_unique_df = (
pd.concat([df["Category"], ukraine_df["Category"]])
.drop_duplicates()
.reset_index(drop=True)
)
logging.info(f"Total Categories Count: ({category_unique_df.shape[0]}).")

category_unique_df.to_csv(
"plane-alert-categories.csv",
mode="wb",
index=False,
header=True,
encoding="utf8",
lineterminator="\n",
)

0 comments on commit 9d08d13

Please sign in to comment.