Skip to content

Commit

Permalink
Add support of Windsurfercrs
Browse files Browse the repository at this point in the history
  • Loading branch information
Hronom committed Mar 17, 2016
1 parent 422b615 commit 5a90e9c
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package com.github.hronom.scrape.dat.rooms.core.html.parsers;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.nio.file.Path;
import java.util.ArrayList;

public class WindsurfercrsHtmlParser implements HtmlParser {
private static final Logger logger = LogManager.getLogger();

private final String baseUri = "https://res.windsurfercrs.com";

@Override
public ArrayList<RoomInfo> parse(String html, RoomPhotoDownloader downloader) {
ArrayList<RoomInfo> results = new ArrayList<>();

Document doc = Jsoup.parse(html, baseUri);
Element roomsContainerElement =
doc.select("div[id=\"dvWsResultRooms\"][class=\"ws-results\"]").first();
if (roomsContainerElement != null) {
Elements roomsElements = roomsContainerElement.select("article[id^=\"ws-rsrm-\"]");
for (Element roomElement : roomsElements) {
RoomInfo roomInfo = parseRoom(roomElement, downloader);
results.add(roomInfo);
}
}
else {
logger.error("Not valid HTML for RedLion website!");
return null;
}

return results;
}

private RoomInfo parseRoom(Element element, RoomPhotoDownloader downloader) {
RoomInfo roomInfo = new RoomInfo();
parsePhoto(element, roomInfo, downloader);
parseRate(element, roomInfo);
parseAmenities(element, roomInfo);
return roomInfo;
}

private void parsePhoto(Element element, RoomInfo roomInfo, RoomPhotoDownloader downloader) {
// <img src="https://reservations.redlion.com/CrsMedia/P2072/rm/DSCN0265.JPG" class="coverme" alt="Room">
Element photoElement = element.select("img[class=\"coverme\"]").first();
if (photoElement != null) {
String photoUrl = photoElement.absUrl("src");
if(!photoUrl.contains("default.png")) {
Path savePath = downloader.download(photoUrl);
if (savePath != null) {
roomInfo.roomPhotoPath = savePath.toString();
}
}
}
}

private void parseRate(Element element, RoomInfo roomInfo) {
// <span class="ws-number" ref="55.00">$55</span>
Element currencyElement = element.select("span[class=\"ws-number\"]").first();
if (currencyElement != null) {
roomInfo.rate = currencyElement.attr("ref") + '$';
}
}

private void parseAmenities(Element element, RoomInfo roomInfo) {
// <h1>
Element amenitiesElement = element.select("h1").first();
if (amenitiesElement != null) {
roomInfo.amenities = amenitiesElement.ownText().trim();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import com.github.hronom.scrape.dat.rooms.core.html.parsers.RedRoofHtmlParser;
import com.github.hronom.scrape.dat.rooms.core.html.parsers.RoomInfo;
import com.github.hronom.scrape.dat.rooms.core.html.parsers.RoomPhotoDownloader;
import com.github.hronom.scrape.dat.rooms.core.html.parsers.WindsurfercrsHtmlParser;
import com.github.hronom.scrape.dat.rooms.core.html.parsers.utils.NetworkUtils;
import com.github.hronom.scrape.dat.rooms.core.html.parsers.utils.PathsUtils;
import com.github.hronom.scrape.dat.rooms.core.webpage.html.grabbers.Grabber;
Expand Down Expand Up @@ -57,6 +58,10 @@ public class ScrapeButtonController {
private final Path ebookersResultsPhotosDir = ebookersResultsDir.resolve("photos");
private final EbookersHtmlParser ebookersHtmlParser = new EbookersHtmlParser();

private final Path windsurfercrsResultsDir = resultsPath.resolve("windsurfercrs");
private final Path windsurfercrsResultsPhotosDir = windsurfercrsResultsDir.resolve("photos");
private final WindsurfercrsHtmlParser windsurfercrsHtmlParser = new WindsurfercrsHtmlParser();

public ScrapeButtonController(ScrapeView scrapeViewArg) {
scrapeView = scrapeViewArg;
scrapeView.addScrapeButtonActionListener(createScrapeButtonActionListener());
Expand Down Expand Up @@ -176,6 +181,19 @@ public void run() {
}
break;
}
case windsurfercrs: {
prepareFolder(
windsurfercrsResultsDir,
windsurfercrsResultsPhotosDir
);
RoomPhotoDownloader downloader =
createRoomPhotoDownloader(windsurfercrsResultsPhotosDir);
roomInfos = windsurfercrsHtmlParser.parse(html, downloader);
if (roomInfos != null) {
save(roomInfos, windsurfercrsResultsDir);
}
break;
}
default: {
logger
.error("Unknown browser engine: " + selectedBrowserEngine);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ public void insertUpdate(DocumentEvent e) {
scrapeView.selectParser(ScrapeView.Parser.RedLion);
} else if (str.contains("ebookers.com")) {
scrapeView.selectParser(ScrapeView.Parser.ebookers);
} else if (str.contains("windsurfercrs.com")) {
scrapeView.selectParser(ScrapeView.Parser.windsurfercrs);
}
} catch (BadLocationException exception) {
logger.error(exception);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ public enum Parser {
Motel6,
RedRoof,
RedLion,
ebookers
ebookers,
windsurfercrs
}

public ScrapeView() {
Expand Down

0 comments on commit 5a90e9c

Please sign in to comment.