-
Notifications
You must be signed in to change notification settings - Fork 0
/
rvest example.R
55 lines (39 loc) · 1.29 KB
/
rvest example.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
install.packages("rvest")
library(rvest)
demo(package = "rvest")
demo(package = "rvest",topic = "tripadvisor")
locations_page <- read_html("http://www.secondharvestmetrolina.org/agencies/Get-Food-Assistance")
# method 1: this one has the phone number, but the text is messy
locations_page %>%
html_nodes("table") %>%
.[[1]] %>%
html_table() %>%
head()
# method 2: extract each column individually, then assemble
one_example <- locations_page %>%
html_nodes(".xmp-location-listing") %>%
.[1]
as.character(one_example)
title <- locations_page %>%
html_nodes(".xmp-location-listing") %>%
html_attr("data-title")
latitude <- locations_page %>%
html_nodes(".xmp-location-listing") %>%
html_attr("data-latitude")
longitude <- locations_page %>%
html_nodes(".xmp-location-listing") %>%
html_attr("data-longitude")
description <- locations_page %>%
html_nodes(".xmp-location-listing") %>%
html_attr("data-description")
id <- locations_page %>%
html_nodes(".xmp-location-listing") %>%
html_attr("data-id")
# assemble the parts
df <- data.frame(title,latitude,longitude,description,id)
df
# the xpath method
locations_page %>%
html_nodes(xpath='//*[contains(concat( " ", @class, " " ), concat( " ", "findLoc", " " ))] | //td') %>%
html_text() %>%
head()