Skip to content

Commit

Permalink
Add gog giveaways support (#33)
Browse files Browse the repository at this point in the history
Signed-off-by: Eiko Wagenknecht <[email protected]>
  • Loading branch information
eikowagenknecht authored Mar 31, 2022
1 parent 5bbbd6b commit ad4d30b
Show file tree
Hide file tree
Showing 7 changed files with 208 additions and 6 deletions.
1 change: 1 addition & 0 deletions app/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class Source(Enum):
AMAZON = "Amazon Prime"
EPIC = "Epic Games"
STEAM = "Steam"
GOG = "GOG"


@dataclass
Expand Down
4 changes: 2 additions & 2 deletions app/feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
from datetime import datetime
from pathlib import Path

from feedgen.feed import FeedGenerator, FeedEntry
from feedgen.feed import FeedEntry, FeedGenerator

from .common import (
TIMESTAMP_SHORT,
TIMESTAMP_LONG,
TIMESTAMP_READABLE_WITH_HOUR,
TIMESTAMP_SHORT,
LootOffer,
OfferType,
Source,
Expand Down
2 changes: 1 addition & 1 deletion app/scraper/info/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re
import difflib
import re

RESULT_MATCH_THRESHOLD = 0.85

Expand Down
6 changes: 3 additions & 3 deletions app/scraper/loot/amazon_prime.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
XPATH_GAMES = (
'//div[@data-a-target="offer-list-FGWP_FULL"]//div[@data-a-target="Offer"]'
)
SUBPATH_TITLE = './/div[contains(concat(" ", normalize-space(@class), " "), " offer__body__titles")]/h3'
SUBPATH_PARAGRAPH = './/div[contains(concat(" ", normalize-space(@class), " "), " offer__body__titles")]/p'
SUBPATH_TITLE = './/div[contains(concat(" ", normalize-space(@class), " "), " offer__body__titles ")]/h3'
SUBPATH_PARAGRAPH = './/div[contains(concat(" ", normalize-space(@class), " "), " offer__body__titles ")]/p'
SUBPATH_ENDDATE = (
'.//div[contains(concat(" ", normalize-space(@class), " "), " claim-info")]//p/span'
'.//div[contains(concat(" ", normalize-space(@class), " "), " claim-info ")]//p/span'
)
SUBPATH_LINK = './/a[@data-a-target="learn-more-card"]'
SUBPATH_IMG = './/img[@class="tw-image"]'
Expand Down
191 changes: 191 additions & 0 deletions app/scraper/loot/gog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
import logging
from dataclasses import dataclass
from datetime import datetime, timezone
from time import sleep

from selenium.common.exceptions import WebDriverException
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.common.by import By
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

from app.common import LootOffer, OfferType, Source
from app.scraper.loot.scraper import Scraper

SCRAPER_NAME = "GOG"
ROOT_URL = "https://www.gog.com/#giveaway"
MAX_WAIT_SECONDS = 60 # Needs to be quite high in Docker for first run

XPATH_PAGE_LOADED = """//div[@class="content cf"]"""
XPATH_GIVEAWAY = """//a[contains(concat(" ", normalize-space(@class), " "), " giveaway-banner ")]""" # URL: Attribute href
XPATH_SWITCH_TO_ENGLISH = """//li[@class="footer-microservice-language__item"][1]"""
SUBPATH_TITLE = """.//span[contains(concat(" ", normalize-space(@class), " "), " giveaway-banner__title ")]"""
SUBPATH_IMAGE = """.//div[contains(concat(" ", normalize-space(@class), " "), " giveaway-banner__image ")]//source[@type="image/png" and not(@media)]""" # Attribute srcset, first entry without the "2x text + root url"
SUBPATH_VALID_TO = """.//gog-countdown-timer""" # Attr "end-date" without the last 3 digits (000) is the timestamp in unixtime


@dataclass
class RawOffer:
title: str | None
valid_to: str | None
url: str | None
img_url: str | None


class GogScraper(Scraper):
@staticmethod
def scrape(
driver: WebDriver, options: dict[str, bool] = None
) -> dict[str, list[LootOffer]]:
if options and not options[OfferType.GAME.name]:
return {}

driver.get(ROOT_URL)

offers = {}

logging.info(f"Analyzing {ROOT_URL} for {OfferType.GAME.value} offers")
offers[OfferType.GAME.name] = GogScraper.read_offers_from_page(driver)

return offers

@staticmethod
def read_offers_from_page(driver: WebDriver) -> list[LootOffer]:
try:
# Wait until the page loaded
WebDriverWait(driver, MAX_WAIT_SECONDS).until(
EC.presence_of_element_located((By.XPATH, XPATH_PAGE_LOADED))
)
except WebDriverException:
logging.error(f"Page took longer than {MAX_WAIT_SECONDS} to load")
return []

try:
# Switch to english version
en = driver.find_element(By.XPATH, XPATH_SWITCH_TO_ENGLISH)
en.click()
sleep(1) # Wait for the language switching to begin
except WebDriverException:
logging.error("Couldn't switch to English")
return []

try:
# Wait until the page loaded
WebDriverWait(driver, MAX_WAIT_SECONDS).until(
EC.presence_of_element_located((By.XPATH, XPATH_GIVEAWAY))
)
except WebDriverException:
logging.info(
f"Giveaways took longer than {MAX_WAIT_SECONDS} to load, probably there are none"
)
return []

offer_element = driver.find_element(By.XPATH, XPATH_GIVEAWAY)

raw_offers: list[RawOffer] = []
raw_offers.append(GogScraper.read_raw_offer(offer_element))

normalized_offers = GogScraper.normalize_offers(raw_offers)

return normalized_offers

@staticmethod
def read_raw_offer(element: WebElement) -> RawOffer:
title_str = None
valid_to_str = None
url_str = None
img_url_str = None

try:
title_str = str(element.find_element(By.XPATH, SUBPATH_TITLE).text)
title_str = title_str.removeprefix("Claim ")
title_str = title_str.removesuffix(
" and don't miss the best GOG offers in the future!"
)
except WebDriverException:
# Nothing to do here, string stays empty
pass

try:
valid_to_str = str(
element.find_element(By.XPATH, SUBPATH_VALID_TO).get_attribute(
"end-date"
)
)
except WebDriverException:
# Nothing to do here, string stays empty
pass

try:
url_str = str(element.get_attribute("href")) # type: ignore
except WebDriverException:
# Nothing to do here, string stays empty
pass

try:
img_url_str = str(
element.find_element(By.XPATH, SUBPATH_IMAGE).get_attribute("srcset")
)
img_url_str = "https:" + (
img_url_str.split(",")[0]
.strip()
.removesuffix(" 2x")
.removesuffix(" 1x")
)
except WebDriverException:
# Nothing to do here, string stays empty
pass

# For current offers, the date is included twice but only means the enddate

return RawOffer(
title=title_str,
valid_to=valid_to_str,
url=url_str,
img_url=img_url_str,
)

@staticmethod
def normalize_offers(raw_offers: list[RawOffer]) -> list[LootOffer]:
normalized_offers: list[LootOffer] = []

for offer in raw_offers:
# Raw text
rawtext = ""
if offer.title:
rawtext += f"<title>{offer.title}</title>"

if offer.valid_to:
rawtext += f"<enddate>{offer.valid_to}</enddate>"

# Title
# Contains additional text that needs to be stripped
title = offer.title

# Valid to
valid_to_stamp = None
if offer.valid_to:
try:
valid_to_unix = int(offer.valid_to) / 1000
valid_to_stamp = datetime.utcfromtimestamp(valid_to_unix).replace(
tzinfo=timezone.utc
)
except ValueError:
valid_to_stamp = None

nearest_url = offer.url if offer.url else ROOT_URL
loot_offer = LootOffer(
seen_last=datetime.now(timezone.utc),
source=Source.GOG,
type=OfferType.GAME,
rawtext=rawtext,
title=title,
valid_to=valid_to_stamp,
url=nearest_url,
img_url=offer.img_url,
)

if title is not None and len(title) > 0:
normalized_offers.append(loot_offer)
return normalized_offers
1 change: 1 addition & 0 deletions config.default.ini
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ ForceUpdate = yes
[sources_loot]
Amazon = yes
Epic = yes
Gog = yes
Steam = yes

[sources_info]
Expand Down
9 changes: 9 additions & 0 deletions lootscraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from app.scraper.info.steam import get_steam_details
from app.scraper.loot.amazon_prime import AmazonScraper
from app.scraper.loot.epic_games import EpicScraper
from app.scraper.loot.gog import GogScraper
from app.scraper.loot.steam import SteamScraper
from app.upload import upload_to_server

Expand Down Expand Up @@ -88,6 +89,7 @@ def job() -> None:
cfg_amazon: bool = Config.config().getboolean("sources_loot", "Amazon")
cfg_epic: bool = Config.config().getboolean("sources_loot", "Epic")
cfg_steam: bool = Config.config().getboolean("sources_loot", "Steam")
cfg_gog: bool = Config.config().getboolean("sources_loot", "Gog")

cfg_games: bool = Config.config().getboolean("actions", "ScrapeGames")
cfg_loot: bool = Config.config().getboolean("actions", "ScrapeLoot")
Expand Down Expand Up @@ -117,6 +119,13 @@ def job() -> None:
else:
logging.info(f"Skipping {Source.STEAM.value}")

if cfg_gog:
scraped_offers[Source.GOG.name] = GogScraper.scrape(
webdriver, cfg_what_to_scrape
)
else:
logging.info(f"Skipping {Source.GOG.value}")

# Check which offers are new and which are updated, then act accordingly:
# - Offers that are neither new nor updated just get a new date
# - Offers that are new are inserted
Expand Down

0 comments on commit ad4d30b

Please sign in to comment.