From 5a90e9c18d35a4a52350654d60a57c7e3306bbd2 Mon Sep 17 00:00:00 2001 From: Hronom Date: Thu, 17 Mar 2016 22:59:06 +0200 Subject: [PATCH] Add support of Windsurfercrs --- .../html/parsers/WindsurfercrsHtmlParser.java | 77 +++++++++++++++++++ .../controllers/ScrapeButtonController.java | 18 +++++ .../WebsiteUrlTypingController.java | 2 + .../dat/rooms/view/views/ScrapeView.java | 3 +- 4 files changed, 99 insertions(+), 1 deletion(-) create mode 100755 scrape-dat-rooms-core/src/main/java/com/github/hronom/scrape/dat/rooms/core/html/parsers/WindsurfercrsHtmlParser.java diff --git a/scrape-dat-rooms-core/src/main/java/com/github/hronom/scrape/dat/rooms/core/html/parsers/WindsurfercrsHtmlParser.java b/scrape-dat-rooms-core/src/main/java/com/github/hronom/scrape/dat/rooms/core/html/parsers/WindsurfercrsHtmlParser.java new file mode 100755 index 0000000..5ecc680 --- /dev/null +++ b/scrape-dat-rooms-core/src/main/java/com/github/hronom/scrape/dat/rooms/core/html/parsers/WindsurfercrsHtmlParser.java @@ -0,0 +1,77 @@ +package com.github.hronom.scrape.dat.rooms.core.html.parsers; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.nio.file.Path; +import java.util.ArrayList; + +public class WindsurfercrsHtmlParser implements HtmlParser { + private static final Logger logger = LogManager.getLogger(); + + private final String baseUri = "https://res.windsurfercrs.com"; + + @Override + public ArrayList parse(String html, RoomPhotoDownloader downloader) { + ArrayList results = new ArrayList<>(); + + Document doc = Jsoup.parse(html, baseUri); + Element roomsContainerElement = + doc.select("div[id=\"dvWsResultRooms\"][class=\"ws-results\"]").first(); + if (roomsContainerElement != null) { + Elements roomsElements = roomsContainerElement.select("article[id^=\"ws-rsrm-\"]"); + for (Element roomElement : roomsElements) { + RoomInfo roomInfo = parseRoom(roomElement, downloader); + results.add(roomInfo); + } + } + else { + logger.error("Not valid HTML for RedLion website!"); + return null; + } + + return results; + } + + private RoomInfo parseRoom(Element element, RoomPhotoDownloader downloader) { + RoomInfo roomInfo = new RoomInfo(); + parsePhoto(element, roomInfo, downloader); + parseRate(element, roomInfo); + parseAmenities(element, roomInfo); + return roomInfo; + } + + private void parsePhoto(Element element, RoomInfo roomInfo, RoomPhotoDownloader downloader) { + // Room + Element photoElement = element.select("img[class=\"coverme\"]").first(); + if (photoElement != null) { + String photoUrl = photoElement.absUrl("src"); + if(!photoUrl.contains("default.png")) { + Path savePath = downloader.download(photoUrl); + if (savePath != null) { + roomInfo.roomPhotoPath = savePath.toString(); + } + } + } + } + + private void parseRate(Element element, RoomInfo roomInfo) { + // $55 + Element currencyElement = element.select("span[class=\"ws-number\"]").first(); + if (currencyElement != null) { + roomInfo.rate = currencyElement.attr("ref") + '$'; + } + } + + private void parseAmenities(Element element, RoomInfo roomInfo) { + //

+ Element amenitiesElement = element.select("h1").first(); + if (amenitiesElement != null) { + roomInfo.amenities = amenitiesElement.ownText().trim(); + } + } +} diff --git a/scrape-dat-rooms-view/src/main/java/com/github/hronom/scrape/dat/rooms/view/controllers/ScrapeButtonController.java b/scrape-dat-rooms-view/src/main/java/com/github/hronom/scrape/dat/rooms/view/controllers/ScrapeButtonController.java index cd096d5..74a8494 100755 --- a/scrape-dat-rooms-view/src/main/java/com/github/hronom/scrape/dat/rooms/view/controllers/ScrapeButtonController.java +++ b/scrape-dat-rooms-view/src/main/java/com/github/hronom/scrape/dat/rooms/view/controllers/ScrapeButtonController.java @@ -9,6 +9,7 @@ import com.github.hronom.scrape.dat.rooms.core.html.parsers.RedRoofHtmlParser; import com.github.hronom.scrape.dat.rooms.core.html.parsers.RoomInfo; import com.github.hronom.scrape.dat.rooms.core.html.parsers.RoomPhotoDownloader; +import com.github.hronom.scrape.dat.rooms.core.html.parsers.WindsurfercrsHtmlParser; import com.github.hronom.scrape.dat.rooms.core.html.parsers.utils.NetworkUtils; import com.github.hronom.scrape.dat.rooms.core.html.parsers.utils.PathsUtils; import com.github.hronom.scrape.dat.rooms.core.webpage.html.grabbers.Grabber; @@ -57,6 +58,10 @@ public class ScrapeButtonController { private final Path ebookersResultsPhotosDir = ebookersResultsDir.resolve("photos"); private final EbookersHtmlParser ebookersHtmlParser = new EbookersHtmlParser(); + private final Path windsurfercrsResultsDir = resultsPath.resolve("windsurfercrs"); + private final Path windsurfercrsResultsPhotosDir = windsurfercrsResultsDir.resolve("photos"); + private final WindsurfercrsHtmlParser windsurfercrsHtmlParser = new WindsurfercrsHtmlParser(); + public ScrapeButtonController(ScrapeView scrapeViewArg) { scrapeView = scrapeViewArg; scrapeView.addScrapeButtonActionListener(createScrapeButtonActionListener()); @@ -176,6 +181,19 @@ public void run() { } break; } + case windsurfercrs: { + prepareFolder( + windsurfercrsResultsDir, + windsurfercrsResultsPhotosDir + ); + RoomPhotoDownloader downloader = + createRoomPhotoDownloader(windsurfercrsResultsPhotosDir); + roomInfos = windsurfercrsHtmlParser.parse(html, downloader); + if (roomInfos != null) { + save(roomInfos, windsurfercrsResultsDir); + } + break; + } default: { logger .error("Unknown browser engine: " + selectedBrowserEngine); diff --git a/scrape-dat-rooms-view/src/main/java/com/github/hronom/scrape/dat/rooms/view/controllers/WebsiteUrlTypingController.java b/scrape-dat-rooms-view/src/main/java/com/github/hronom/scrape/dat/rooms/view/controllers/WebsiteUrlTypingController.java index b68e4ab..903264e 100644 --- a/scrape-dat-rooms-view/src/main/java/com/github/hronom/scrape/dat/rooms/view/controllers/WebsiteUrlTypingController.java +++ b/scrape-dat-rooms-view/src/main/java/com/github/hronom/scrape/dat/rooms/view/controllers/WebsiteUrlTypingController.java @@ -29,6 +29,8 @@ public void insertUpdate(DocumentEvent e) { scrapeView.selectParser(ScrapeView.Parser.RedLion); } else if (str.contains("ebookers.com")) { scrapeView.selectParser(ScrapeView.Parser.ebookers); + } else if (str.contains("windsurfercrs.com")) { + scrapeView.selectParser(ScrapeView.Parser.windsurfercrs); } } catch (BadLocationException exception) { logger.error(exception); diff --git a/scrape-dat-rooms-view/src/main/java/com/github/hronom/scrape/dat/rooms/view/views/ScrapeView.java b/scrape-dat-rooms-view/src/main/java/com/github/hronom/scrape/dat/rooms/view/views/ScrapeView.java index c10aa0d..d6360a1 100755 --- a/scrape-dat-rooms-view/src/main/java/com/github/hronom/scrape/dat/rooms/view/views/ScrapeView.java +++ b/scrape-dat-rooms-view/src/main/java/com/github/hronom/scrape/dat/rooms/view/views/ScrapeView.java @@ -52,7 +52,8 @@ public enum Parser { Motel6, RedRoof, RedLion, - ebookers + ebookers, + windsurfercrs } public ScrapeView() {