From 6aaa042a399fc2ab7266442310d9a381e764a242 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 21 Jan 2024 20:16:15 -0500 Subject: [PATCH] Save last offset for member pages. --- .../20240112_01_Njk7j-create-initial-tables.sql | 5 +++++ queries/progress.sql | 14 ++++++++++++++ yellowstone/core.py | 1 - yellowstone/job/get_site.py | 1 + yellowstone/job/index_site_members.py | 6 ++++-- 5 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 queries/progress.sql diff --git a/migrations/20240112_01_Njk7j-create-initial-tables.sql b/migrations/20240112_01_Njk7j-create-initial-tables.sql index c7b5625..e2edc6e 100644 --- a/migrations/20240112_01_Njk7j-create-initial-tables.sql +++ b/migrations/20240112_01_Njk7j-create-initial-tables.sql @@ -12,6 +12,11 @@ CREATE TABLE site ( language TEXT NOT NULL ); +CREATE TABLE site_progress ( + site_slug TEXT PRIMARY KEY REFERENCES site(site_slug), + last_member_offset INTEGER NOT NULL DEFAULT 1 +); + CREATE TABLE "user" ( user_slug TEXT PRIMARY KEY, user_name TEXT NOT NULL, diff --git a/queries/progress.sql b/queries/progress.sql new file mode 100644 index 0000000..8baadf0 --- /dev/null +++ b/queries/progress.sql @@ -0,0 +1,14 @@ +-- :name add_site_progress :insert +INSERT INTO site_progress (site_slug) + VALUES (:site_slug) + ON CONFLICT (site_slug) + DO NOTHING; + +-- :name get_last_member_offset :one +SELECT get_last_member_offset FROM site_progress + WHERE site_slug = :site_slug; + +-- :name update_last_member_offset :affected +UPDATE site_progress + SET last_member_offset = MAX(last_member_offset, :last_offset) + WHERE site_slug = :site_slug; diff --git a/yellowstone/core.py b/yellowstone/core.py index 7b57c65..b6c5054 100644 --- a/yellowstone/core.py +++ b/yellowstone/core.py @@ -16,7 +16,6 @@ JobManager, get_site, ) -from .job.index_site_members import START_OFFSET as START_MEMBER_OFFSET from .s3 import S3 from .wikidot import Wikidot diff --git a/yellowstone/job/get_site.py b/yellowstone/job/get_site.py index 3176458..81022f4 100644 --- a/yellowstone/job/get_site.py +++ b/yellowstone/job/get_site.py @@ -52,6 +52,7 @@ def insert_site( language=site.language, home_slug=site.home_page_slug, ) + database.add_site_progress(site_slug=site.slug) # TODO insert basic home data _ = site.home_page_id diff --git a/yellowstone/job/index_site_members.py b/yellowstone/job/index_site_members.py index 59abe09..c76eb37 100644 --- a/yellowstone/job/index_site_members.py +++ b/yellowstone/job/index_site_members.py @@ -13,8 +13,6 @@ if TYPE_CHECKING: from ..core import BackupDispatcher -START_OFFSET = 1 - logger = logging.getLogger(__name__) @@ -44,6 +42,7 @@ def run(core: "BackupDispatcher", data: SiteMemberJob) -> None: use_admin=core.config.uses_admin_members(site_slug), ) + # Save member data if members: with core.database.transaction(): # Queue the next offset, for iterating over pages using the job queue @@ -57,3 +56,6 @@ def run(core: "BackupDispatcher", data: SiteMemberJob) -> None: joined_at=member.joined_at, ) core.job.fetch_user({"user_id": member.id}) + + # Save member page progress + core.database.update_last_member_offset(last_offset=offset)