Skip to content

Commit

Permalink
support new lidl sublinks scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
StefanBratanov committed Oct 6, 2023
1 parent 81b6eb0 commit 1e8d298
Showing 1 changed file with 2 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,11 @@ class LidlSublinksScraper(
log.info("Scraping {} for sublinks", baseUrl)

return getHtmlDocument(baseUrl)
.select("li[data-Creative=Main Navigation] > a")
.select("li.AHeroStageItems__Item > a")
.map {
val href = it.attr("href")
val sublink = baseUrl.toURI().resolve(href).toURL()
getHtmlDocument(sublink)
}
.flatMap {
it
.select("li.nuc-m-header-sub-nav-item > a")
.map { elem -> elem.attr("href") }
.filter { href -> !href.contains("lidl-plus") }
baseUrl.toURI().resolve(href).toURL()
}
.distinct()
.map { baseUrl.toURI().resolve(it).toURL() }
}
}

0 comments on commit 1e8d298

Please sign in to comment.