From d17e1d2736d445110cf861133a87a09361654366 Mon Sep 17 00:00:00 2001 From: king-millez Date: Fri, 26 Apr 2024 23:55:52 +1000 Subject: [PATCH 1/2] Add time-based snap filtering --- pyproject.toml | 2 +- setup.py | 2 +- snapmap_archiver/SnapmapArchiver.py | 27 +++++++++++++++++++++------ snapmap_archiver/__init__.py | 9 +++++++++ snapmap_archiver/time.py | 15 +++++++++++++++ 5 files changed, 47 insertions(+), 8 deletions(-) create mode 100644 snapmap_archiver/time.py diff --git a/pyproject.toml b/pyproject.toml index 8573edf..06cd443 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "snapmap-archiver" -version = "2.1.1" +version = "2.1.2" description = "Download all Snap Map content from a specific location." readme = "README.md" authors = ["Miles Greenwark "] diff --git a/setup.py b/setup.py index 8c50787..0903388 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup -version = "2.1.1" +version = "2.1.2" with open("README.md", "r") as f: long_descr = f.read() diff --git a/snapmap_archiver/SnapmapArchiver.py b/snapmap_archiver/SnapmapArchiver.py index f085413..215a065 100644 --- a/snapmap_archiver/SnapmapArchiver.py +++ b/snapmap_archiver/SnapmapArchiver.py @@ -2,14 +2,15 @@ import os import re import sys +import typing as t from datetime import datetime from time import sleep -from typing import Any, Iterable import requests from snapmap_archiver.coordinates import Coordinates from snapmap_archiver.snap import Snap, SnapJSONEncoder +from snapmap_archiver.time import since_epoch DEFAULT_RADIUS = 10_000 MAX_RADIUS = 85_000 @@ -24,7 +25,8 @@ def __init__( self, *args: str, output_dir: str, - input_file: str | None = None, + input_file: t.Optional[str] = None, + since_time: t.Optional[str] = None, locations: list[str] = [], radius: int = DEFAULT_RADIUS, write_json: bool = False, @@ -35,6 +37,11 @@ def __init__( "Python 3.10 or above is required to use snapmap-archiver!" ) + self.since_time = None + if since_time: + self.since_time = since_epoch(since_time) + print(f"Skipping Snaps older than [{self.since_time}].") + self.input_file = input_file self.arg_snaps = args @@ -54,7 +61,7 @@ def __init__( self.radius = MAX_RADIUS if radius > MAX_RADIUS else radius self.coords_list = [Coordinates(latlon) for latlon in locations] - def download_snaps(self, group: Iterable[Snap]): + def download_snaps(self, group: t.Iterable[Snap]): for snap in group: fpath = os.path.join(self.output_dir, f"{snap.snap_id}.{snap.file_type}") @@ -67,7 +74,7 @@ def download_snaps(self, group: Iterable[Snap]): print(f" - Downloaded [{fpath}].") - def query_snaps(self, snaps: Iterable[str]) -> list[Snap]: + def query_snaps(self, snaps: t.Iterable[str]) -> list[Snap]: to_query: list[str] = [] for snap_id in snaps: rgx_match = re.search( @@ -196,7 +203,7 @@ def main(self): def _parse_snap( self, snap: dict[ - str, Any + str, t.Any ], # I don't like the Any type but this dict is so dynamic there isn't much point hinting it accurately. ) -> Snap | None: if self.all_snaps.get(snap["id"]): @@ -215,8 +222,16 @@ def _parse_snap( print(f'Media URL for snap [{snap["id"]}] could not be determined.') return None + create_time = round(int(snap["timestamp"]) * 10**-3, 3) + + if (self.since_time) and (create_time < self.since_time): + print( + f" - [{snap['id']}] is older than the specified time of [{self.since_time}]. Snap timestamp: [{int(create_time)}]. Skipping." + ) + return None + s = Snap( - create_time=round(int(snap["timestamp"]) * 10**-3, 3), # type: ignore + create_time=create_time, # type: ignore snap_id=snap["id"], url=url, file_type=file_type, diff --git a/snapmap_archiver/__init__.py b/snapmap_archiver/__init__.py index f5195c6..679e285 100644 --- a/snapmap_archiver/__init__.py +++ b/snapmap_archiver/__init__.py @@ -51,6 +51,14 @@ def main(): type=str, help="File containing line-separated Snap URLs or IDs", ) + parser.add_argument( + "-t", + "--since-time", + dest="since_time", + type=str, + help="Remove any Snaps older than the passed time. Either a 10 digit UTC Unix timestamp or [n = number of][m = minutes | h = hours | d = days] (e.g., 1d, 15h, 30m).", + default=None, + ) args, unknown = parser.parse_known_args() sm_archiver = SnapmapArchiver( @@ -61,5 +69,6 @@ def main(): zoom_depth=args.zoom_depth, write_json=args.write_json, input_file=args.input_file, + since_time=args.since_time, ) sm_archiver.main() diff --git a/snapmap_archiver/time.py b/snapmap_archiver/time.py new file mode 100644 index 0000000..2d4cf2b --- /dev/null +++ b/snapmap_archiver/time.py @@ -0,0 +1,15 @@ +from datetime import datetime + + +def since_epoch(since_time: str) -> int: + if since_time.isdigit(): + return int(since_time) + + num = int(since_time[:-1]) + if since_time[-1] == "m": + return int(datetime.now().timestamp()) - num * 60 + if since_time[-1] == "h": + return int(datetime.now().timestamp()) - num * 60 * 60 + if since_time[-1] == "d": + return int(datetime.now().timestamp()) - num * 60 * 60 * 24 + raise ValueError(f"Invalid time filter: [{since_time}]") From a36f9f33fcfcbcf2928f1f07c6bcb739495de651 Mon Sep 17 00:00:00 2001 From: king-millez Date: Fri, 26 Apr 2024 23:58:14 +1000 Subject: [PATCH 2/2] Allow any time filter case --- snapmap_archiver/SnapmapArchiver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snapmap_archiver/SnapmapArchiver.py b/snapmap_archiver/SnapmapArchiver.py index 215a065..dee872e 100644 --- a/snapmap_archiver/SnapmapArchiver.py +++ b/snapmap_archiver/SnapmapArchiver.py @@ -39,7 +39,7 @@ def __init__( self.since_time = None if since_time: - self.since_time = since_epoch(since_time) + self.since_time = since_epoch(since_time.lower()) print(f"Skipping Snaps older than [{self.since_time}].") self.input_file = input_file