Skip to content

Commit

Permalink
ci: fix check databases script
Browse files Browse the repository at this point in the history
  • Loading branch information
rickstaa committed Mar 9, 2023
1 parent 861385f commit d898f39
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 32 deletions.
4 changes: 0 additions & 4 deletions plane_images.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12881,10 +12881,6 @@ A0FEBB,https://cdn.jetphotos.com/full/6/53634_1620789974.jpg,,,
50844D,https://cdn.jetphotos.com/full/6/24026_1620718500.jpg,https://cdn.jetphotos.com/full/6/58084_1628779538.jpg,https://cdn.jetphotos.com/full/6/38737_1622457839.jpg,
508056,https://cdn.jetphotos.com/full/6/47939_1634755862.jpg,https://cdn.jetphotos.com/full/5/24309_1616151886.jpg,https://cdn.jetphotos.com/full/6/29163_1613478304.jpg,
A12F83,https://www.helis.com/h2/s-76c_n176am.jpg,https://www.helis.com/h3/aero_med_spectrum_health.jpg,https://cdn.airplane-pictures.net/images/uploaded-images/2021/6/12/1399587.jpg,https://photos-e1.flightcdn.com/photos/retriever/f11abd85ec9fa33a4a919c6df156c3b7c21090f1
030012,https://cdn.jetphotos.com/full/5/48040_1657365950.jpg,https://cdn.jetphotos.com/full/6/22375_1507749659.jpg,https://cdn.jetphotos.com/full/6/14219_1506882327.jpg
AE61E0,https://cdn.jetphotos.com/full/5/23289_1585583495.jpg,https://cdn.jetphotos.com/full/5/85716_1569092435.jpg,https://cdn.jetphotos.com/full/6/28527_1654828080.jpg
A4B960,https://cdn.jetphotos.com/full/6/68229_1638839448.jpg,https://cdn.jetphotos.com/full/6/61781_1638636537.jpg,https://cdn.jetphotos.com/full/6/64341_1536447984.jpg
A0AA61,https://cdn.jetphotos.com/full/5/48050_1643157142.jpg,https://cdn.jetphotos.com/full/5/46281_1663272526.jpg,https://cdn.jetphotos.com/full/5/57477_1659559162.jpg
030012,https://cdn.jetphotos.com/full/5/48040_1657365950.jpg,https://cdn.jetphotos.com/full/6/22375_1507749659.jpg,https://cdn.jetphotos.com/full/6/14219_1506882327.jpg,
AE61E0,https://cdn.jetphotos.com/full/5/23289_1585583495.jpg,https://cdn.jetphotos.com/full/5/85716_1569092435.jpg,https://cdn.jetphotos.com/full/6/28527_1654828080.jpg,
A4B960,https://cdn.jetphotos.com/full/6/68229_1638839448.jpg,https://cdn.jetphotos.com/full/6/61781_1638636537.jpg,https://cdn.jetphotos.com/full/6/64341_1536447984.jpg,
Expand Down
77 changes: 49 additions & 28 deletions scripts/check_main_databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
format="%(asctime)s %(levelname)-8s [%(name)s] %(message)s", level=logging.INFO
)

MAIN_DATABASE_NAME = "plane-alert-db.csv"
TWITTER_BLOCKED_DATABASE_NAME = "plane-alert-twitter-blocked.csv"
UKRAINE_DATABASE_NAME = "plane-alert-ukraine.csv"
PLANE_IMAGES_DATABASE_NAME = "plane_images.txt"


def is_valid_url(url, allow_nans=False):
"""Check if a URL starts with http or https.
Expand Down Expand Up @@ -45,64 +50,67 @@ def is_hex(string):


def contains_duplicate_ICAOs(df):
"""Check if the main database has any duplicate ICAO codes.
"""Check if the database has any duplicate ICAO codes.
Args:
df (pandas.Dataframe): The database to check.
Raises:
Exception: When the main database has duplicate ICAO codes.
Exception: When the database has duplicate ICAO codes.
"""
duplicate_icao = df[df.duplicated(subset="$ICAO", keep=False)]["$ICAO"]
if len(duplicate_icao) > 0:
logging.error("The main database has duplicate ICAO codes.")
db_name = df.name if hasattr(df, "name") else "database"
logging.error(f"The {db_name} database has duplicate ICAO codes.")
sys.stdout.write(
f"The main database has '{duplicate_icao.shape[0]}' duplicate ICAO codes:\n"
f"{duplicate_icao.to_string(index=False)}\n"
f"The ' {db_name}' database has '{duplicate_icao.shape[0]}' duplicate "
f"ICAO codes:\n {duplicate_icao.to_string(index=False)}\n"
)
sys.exit(1)


def contains_duplicate_regs(df):
"""Check if the main database has any duplicate registration numbers.
"""Check if the database has any duplicate registration numbers.
Args:
df (pandas.Dataframe): The database to check.
Raises:
Exception: When the main database has duplicate registration numbers.
Exception: When the database has duplicate registration numbers.
"""

duplicate_regs = df[df.duplicated(subset="$Registration", keep=False)][
["$ICAO", "$Registration"]
]
if len(duplicate_regs) > 0:
logging.error("The main database has duplicate registration numbers.")
db_name = df.name if hasattr(df, "name") else "database"
logging.error(f"The '{db_name}' database has duplicate registration numbers.")
sys.stdout.write(
f"The main database has '{duplicate_regs.shape[0]}' duplicate registration "
f"numbers:\n{duplicate_regs.to_string(index=False)}\n"
f"The '{db_name}' database has '{duplicate_regs.shape[0]}' duplicate "
f"registration numbers:\n{duplicate_regs.to_string(index=False)}\n"
)
sys.exit(1)


def contains_bad_links(df, allow_nans=False):
"""Check if the main database has any links that don't start with http or https.
"""Check if the database has any links that don't start with http or https.
Args:
df (pandas.Dataframe): The database to check.
allow_nans (bool, optional): If True, NaN values will be considered valid.
Defaults to False.
Raises:
Exception: When the main database has invalid links.
Exception: When the database has invalid links.
"""
bad_links = df[~df["$#Link"].apply(is_valid_url, allow_nans).astype(bool)][
["$ICAO", "$#Link"]
].fillna("")
if len(bad_links) > 0:
logging.error("The main database has invalid links.")
db_name = df.name if hasattr(df, "name") else "database"
logging.error(f"The '{db_name}' database has invalid links.")
sys.stdout.write(
f"The main database has '{bad_links.shape[0]}' invalid links:\n"
f"The '{db_name} database has '{bad_links.shape[0]}' invalid links:\n"
f"{bad_links.to_string(index=False)}\n"
)
sys.exit(1)
Expand All @@ -119,14 +127,17 @@ def contains_valid_ICAO_hexes(df):
"""
invalid_hexes = df[~df["$ICAO"].apply(is_hex).astype(bool)]["$ICAO"]
if len(invalid_hexes) > 0:
db_name = df.name if hasattr(df, "name") else "database"
error_strings = (
["value", "is", "a hexidecimal"]
if invalid_hexes.shape[0] == 1
else ["values", "are", "hexidecimals"]
)
logging.error("The main database contains non-hexidecimal '$ICAO' values.")
logging.error(
f"The '{db_name}' database contains non-hexidecimal '$ICAO' values."
)
sys.stdout.write(
f"The main database has '{invalid_hexes.shape[0]}' '$ICAO' "
f"The {db_name} database has '{invalid_hexes.shape[0]}' '$ICAO' "
f"{error_strings[0]} that {error_strings[1]} not {error_strings[2]}:\n"
f"{invalid_hexes.to_string(index=False)}\n"
)
Expand All @@ -139,10 +150,13 @@ def contains_valid_ICAO_hexes(df):
##########################################
logging.info("Checking the main database...")
try:
main_df = pd.read_csv("plane-alert-db.csv")
main_df = pd.read_csv(MAIN_DATABASE_NAME)
main_df.name = MAIN_DATABASE_NAME
except Exception as e:
logging.error("The 'plane-alert-db.csv' database is not a valid CSV.")
sys.stdout.write(f"The 'plane-alert-db.csv' database is not a valid CSV: {e}\n")
logging.error(f"The '{MAIN_DATABASE_NAME}' database is not a valid CSV.")
sys.stdout.write(
f"The '{MAIN_DATABASE_NAME}' database is not a valid CSV: {e}\n"
)
sys.exit(1)

# Preform database checks.
Expand All @@ -161,13 +175,14 @@ def contains_valid_ICAO_hexes(df):
##########################################
logging.info("Checking the 'plane-alert-twitter-blocked' database...")
try:
twitter_blocked_df = pd.read_csv("plane-alert-twitter-blocked.csv")
twitter_blocked_df = pd.read_csv(TWITTER_BLOCKED_DATABASE_NAME)
main_df.name = TWITTER_BLOCKED_DATABASE_NAME
except Exception as e:
logging.error(
"The 'plane-alert-twitter-blocked.csv' database is not a valid CSV."
f"The '{TWITTER_BLOCKED_DATABASE_NAME}' database is not a valid CSV."
)
sys.stdout.write(
f"The 'plane-alert-twitter-blocked.csv' database is not a valid CSV: {e}\n"
f"The '{TWITTER_BLOCKED_DATABASE_NAME}' database is not a valid CSV: {e}\n"
)
sys.exit(1)

Expand All @@ -183,11 +198,12 @@ def contains_valid_ICAO_hexes(df):
##########################################
logging.info("Checking the 'plane-alert-ukraine' database...")
try:
ukraine_df = pd.read_csv("plane-alert-ukraine.csv")
ukraine_df = pd.read_csv(UKRAINE_DATABASE_NAME)
ukraine_df.name = UKRAINE_DATABASE_NAME
except Exception as e:
logging.error("The 'plane-alert-ukraine.csv' database is not a valid CSV.")
logging.error(f"The '{UKRAINE_DATABASE_NAME}' database is not a valid CSV.")
sys.stdout.write(
f"The 'plane-alert-ukraine.csv' database is not a valid CSV: {e}\n"
f"The '{UKRAINE_DATABASE_NAME}' database is not a valid CSV: {e}\n"
)
sys.exit(1)

Expand All @@ -203,10 +219,15 @@ def contains_valid_ICAO_hexes(df):
##########################################
logging.info("Checking the 'plane_images.txt' database...")
try:
images_df = pd.read_csv("plane_images.txt")
images_df = pd.read_csv(PLANE_IMAGES_DATABASE_NAME)
images_df.name = PLANE_IMAGES_DATABASE_NAME
except Exception as e:
logging.error("The 'plane_images.txt' database is not a valid CSV.")
sys.stdout.write(f"The 'plane_images.txt' database is not a valid CSV: {e}\n")
logging.error(
f"The '{PLANE_IMAGES_DATABASE_NAME}' database is not a valid CSV."
)
sys.stdout.write(
f"The '{PLANE_IMAGES_DATABASE_NAME}' database is not a valid CSV: {e}\n"
)
sys.exit(1)

# Perform database checks.
Expand Down

0 comments on commit d898f39

Please sign in to comment.