From 2ad2386a664e4734509d642662343dd5196ed79b Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 21 Jan 2024 20:30:01 -0500 Subject: [PATCH] Add job for continuing at last offset page. --- yellowstone/core.py | 7 +------ yellowstone/job/__init__.py | 10 ++++++++++ yellowstone/job/index_site_members.py | 2 +- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/yellowstone/core.py b/yellowstone/core.py index b6c5054..2673d19 100644 --- a/yellowstone/core.py +++ b/yellowstone/core.py @@ -68,12 +68,7 @@ def queue_all_sites(self) -> None: logger.info("Queueing site start jobs for '%s'", site_slug) # XXX add_index_site_pages_job(site_slug) # XXX add_index_site_forums_job(site_slug) - self.job.index_site_members( - { - "site_slug": site_slug, - "offset": START_MEMBER_OFFSET, - }, - ) + self.job.index_site_members_continue(site_slug) def process_all_jobs(self) -> None: logger.info("Processing all jobs in queue") diff --git a/yellowstone/job/__init__.py b/yellowstone/job/__init__.py index ccb991c..9da7905 100644 --- a/yellowstone/job/__init__.py +++ b/yellowstone/job/__init__.py @@ -64,6 +64,16 @@ def index_site_forums(self, data: None) -> None: def index_site_members(self, data: SiteMemberJob) -> None: self.add_raw(JobType.INDEX_SITE_MEMBERS, cast(Json, data)) + def index_site_members_continue(self, site_slug: str) -> None: + # Reads the last member page, and continues from there + offset = self.database.get_last_number_offset(site_slug=site_slug) + self.index_site_members( + { + "site_slug": site_slug, + "offset": offset, + }, + ) + def fetch_user(self, data: GetUserJob) -> None: self.add_raw(JobType.FETCH_USER, cast(Json, data)) diff --git a/yellowstone/job/index_site_members.py b/yellowstone/job/index_site_members.py index c76eb37..48af85d 100644 --- a/yellowstone/job/index_site_members.py +++ b/yellowstone/job/index_site_members.py @@ -25,7 +25,7 @@ def run(core: "BackupDispatcher", data: SiteMemberJob) -> None: site_slug = data["site_slug"] offset = data["offset"] - assert offset >= START_OFFSET, "Offset cannot be zero or negative" + assert offset >= 1, "Offset cannot be zero or negative" site_id = core.site_id_cache[site_slug] logger.info( "Retrieving page %d of site members from '%s' (%d)",