Skip to content

Commit

Permalink
fix: add debug logging for scrolling
Browse files Browse the repository at this point in the history
Signed-off-by: Eiko Wagenknecht <[email protected]>
  • Loading branch information
eikowagenknecht committed Sep 26, 2024
1 parent 84156bd commit b081f54
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 15 deletions.
2 changes: 1 addition & 1 deletion src/lootscraper/scraper/epic_games.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def get_offer_handlers(self, page: Page) -> list[OfferHandler]:

async def page_loaded_hook(self, page: Page) -> None:
# Scroll to bottom to make free games section load
await Scraper.scroll_page_to_bottom(page)
await Scraper.scroll_page_to_bottom(self, page)

async def read_raw_offer(self, element: Locator) -> RawOffer:
# Scroll element into view to load img url
Expand Down
2 changes: 1 addition & 1 deletion src/lootscraper/scraper/google_games.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def get_offer_handlers(self, page: Page) -> list[OfferHandler]:
]

async def page_loaded_hook(self, page: Page) -> None:
await Scraper.scroll_page_to_bottom(page)
await Scraper.scroll_page_to_bottom(self, page)

async def read_raw_offer(self, element: Locator) -> RawOffer:
# Scroll into view for images to load
Expand Down
2 changes: 1 addition & 1 deletion src/lootscraper/scraper/itch_games.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def get_offer_handlers(self, page: Page) -> list[OfferHandler]:
]

async def page_loaded_hook(self, page: Page) -> None:
await Scraper.scroll_page_to_bottom(page)
await Scraper.scroll_page_to_bottom(self, page)

async def read_raw_offer(self, element: Locator) -> RawOffer:
# Scroll into view to mage sure the image is loaded
Expand Down
60 changes: 48 additions & 12 deletions src/lootscraper/scraper/scraper_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,20 @@ async def page_loaded_hook(self, page: Page) -> None:
(e.g. scroll to bottom of page).
"""

def get_screenshot_filename(self, suffix: str) -> Path:
"""Return the filename of the screenshot to be saved."""
return Config.data_path() / Path(
self.get_source().name.lower()
+ "_"
+ datetime.now(tz=timezone.utc)
.isoformat()
.replace(".", "_")
.replace(":", "_")
+ "_"
+ suffix
+ ".png",
)

async def read_offers(self) -> list[Offer]:
"""
Read all offers from the page.
Expand All @@ -153,16 +167,7 @@ async def read_offers(self) -> list[Offer]:
)
await self.page_loaded_hook(page)
except Error:
filename = Config.data_path() / Path(
"error_"
+ self.get_source().name.lower()
+ "_"
+ datetime.now(tz=timezone.utc)
.isoformat()
.replace(".", "_")
.replace(":", "_")
+ ".png",
)
filename = self.get_screenshot_filename("page_not_ready")
self.logger.exception(
f"The page didn't get ready to be parsed. "
f"Saved screenshot to {filename}.",
Expand Down Expand Up @@ -395,8 +400,7 @@ async def scroll_element_to_bottom(page: Page, element_id: str) -> None:
# One final wait so the content may load
await sleep(SCROLL_PAUSE_SECONDS)

@staticmethod
async def scroll_page_to_bottom(page: Page) -> None:
async def scroll_page_to_bottom(self, page: Page) -> None:
"""
Scroll down to the bottom of the current page.
Expand All @@ -408,6 +412,12 @@ async def scroll_page_to_bottom(page: Page) -> None:
scolled_x_times = 0

while True:
# Take screenshot
filename = self.get_screenshot_filename(
f"debug_scroll_{str(scolled_x_times)}",
)
await page.screenshot(path=str(filename.resolve()))

# Wait to load page. We do this first to give the page time for
# the initial load
await sleep(SCROLL_PAUSE_SECONDS)
Expand All @@ -426,8 +436,34 @@ async def scroll_page_to_bottom(page: Page) -> None:
break

# Wait to load page by scrolling the mouse wheel

# Take screenshot
filename = self.get_screenshot_filename(
"debug_scroll_endloop",
)
await page.screenshot(path=str(filename.resolve()))

await page.mouse.wheel(0, -100)

# Take screenshot
filename = self.get_screenshot_filename(
"debug_scroll_wheel_1",
)
await page.screenshot(path=str(filename.resolve()))

await page.mouse.wheel(0, 100)

# Take screenshot
filename = self.get_screenshot_filename(
"debug_scroll_wheel_2",
)
await page.screenshot(path=str(filename.resolve()))

# One final wait so the content may load
await sleep(SCROLL_PAUSE_SECONDS)

# Take screenshot
filename = self.get_screenshot_filename(
"debug_scroll_final",
)
await page.screenshot(path=str(filename.resolve()))

0 comments on commit b081f54

Please sign in to comment.