-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add check for category to flag new/invalid submissions.
- Loading branch information
Showing
5 changed files
with
157 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
name: Check categories | ||
|
||
on: | ||
pull_request: | ||
paths: | ||
- "plane-alert-db.csv" | ||
- "plane-alert-pia.csv" | ||
- "plane-alert-ukraine.csv" | ||
- "plane_images.csv" | ||
|
||
jobs: | ||
checkCategories: | ||
runs-on: ubuntu-latest | ||
name: Check if main databases have valid Categories | ||
steps: | ||
- name: Checkout | ||
uses: actions/[email protected] | ||
- uses: actions/[email protected] | ||
with: | ||
python-version: "3.10" | ||
cache: "pip" | ||
- run: pip install -r ./scripts/requirements.txt | ||
|
||
- name: Run main database checks | ||
run: python ./scripts/check_categories.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
Category | ||
Other Air Forces | ||
Toy Soldiers | ||
Flying Doctors | ||
Dictator Alert | ||
Governments | ||
Ptolemy would be proud | ||
Police Forces | ||
Dogs with Jobs | ||
Quango | ||
Jump Johnny Jump | ||
Aerial Firefighter | ||
Climate Crisis | ||
Jesus he Knows me | ||
Big Hello | ||
Other Navies | ||
United States Navy | ||
Coastguard | ||
Bizjets | ||
Da Comrade | ||
Gunship | ||
As Seen on TV | ||
Oligarch | ||
Oxcart | ||
Distinctive | ||
Joe Cool | ||
Don't you know who I am? | ||
Historic | ||
You came here in that thing? | ||
Hired Gun | ||
Perfectly Serviceable Aircraft | ||
Gas Bags | ||
Vanity Plate | ||
Watch Me Fly | ||
Zoomies | ||
GAF | ||
Special Forces | ||
Aerobatic Teams | ||
UAV | ||
UK National Police Air Service | ||
RAF | ||
Royal Navy Fleet Air Arm | ||
Royal Aircraft | ||
Army Air Corps | ||
Football | ||
Radiohead | ||
Nuclear | ||
USAF | ||
PIA | ||
CAP | ||
United States Marine Corps | ||
Ukraine |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
""""script to flag new/invalid categories""" | ||
|
||
import logging | ||
import pandas as pd | ||
import sys | ||
|
||
logging.basicConfig( | ||
format="%(asctime)s %(levelname)-8s [%(name)s] %(message)s", level=logging.INFO | ||
) | ||
|
||
if __name__ == "__main__": | ||
logging.info("Reading the main csv file...") | ||
df = pd.read_csv("plane-alert-db.csv") | ||
|
||
logging.info("Reading the Ukraine csv file...") | ||
ukraine_df = pd.read_csv("plane-alert-ukraine.csv") | ||
|
||
category_unique_df = ( | ||
pd.concat([df["Category"], ukraine_df["Category"]]) | ||
.drop_duplicates() | ||
.reset_index(drop=False) | ||
) | ||
category_unique_df = category_unique_df.drop('index', axis=1) | ||
logging.info(f"Total Categories in PR Count: ({category_unique_df.shape[0]}).") | ||
|
||
logging.info("Reading the export category csv file...") | ||
valid_df = pd.read_csv("plane-alert-categories.csv") | ||
|
||
if not valid_df.equals(category_unique_df): | ||
logging.info("Invalid category used!") | ||
merged_df = valid_df.merge(category_unique_df, indicator=True, how='outer') | ||
changed_df = merged_df[merged_df['_merge'] == 'right_only'] | ||
changed_df = changed_df.drop('_merge', axis=1) | ||
|
||
logging.info( | ||
"New Categories found ({}):\n{}".format( | ||
changed_df.shape[0], | ||
changed_df.to_string(header=False, index=False), | ||
) | ||
) | ||
|
||
sys.stdout.write( | ||
f"The files contain invalid or new Categories:\n" | ||
) | ||
sys.exit(1) | ||
logging.info("Categories check good!") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
""""script to export current categories""" | ||
|
||
import logging | ||
import pandas as pd | ||
|
||
logging.basicConfig( | ||
format="%(asctime)s %(levelname)-8s [%(name)s] %(message)s", level=logging.INFO | ||
) | ||
|
||
if __name__ == "__main__": | ||
logging.info("Reading the main csv file...") | ||
df = pd.read_csv("plane-alert-db.csv") | ||
|
||
logging.info("Reading the Ukraine csv file...") | ||
ukraine_df = pd.read_csv("plane-alert-ukraine.csv") | ||
|
||
category_unique_df = ( | ||
pd.concat([df["Category"], ukraine_df["Category"]]) | ||
.drop_duplicates() | ||
.reset_index(drop=True) | ||
) | ||
logging.info(f"Total Categories Count: ({category_unique_df.shape[0]}).") | ||
|
||
category_unique_df.to_csv( | ||
"plane-alert-categories.csv", | ||
mode="wb", | ||
index=False, | ||
header=True, | ||
encoding="utf8", | ||
lineterminator="\n", | ||
) |