Skip to content

Commit

Permalink
Add MemberPage
Browse files Browse the repository at this point in the history
  • Loading branch information
Oliver Denman committed Jul 7, 2017
1 parent dca9f68 commit 60de314
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 5 deletions.
33 changes: 33 additions & 0 deletions lib/member_page.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# frozen_string_literal: true

require 'scraped'

class MemberPage < Scraped::HTML
field :party do
noko.at_css('.partyBio').text.tidy
end

field :phone do
contact_numbers_for('Tel')
end

field :fax do
contact_numbers_for('Fax')
end

field :tty do
contact_numbers_for('TTY')
end

private

def contact_numbers
noko.xpath('.//span[@class="data-type"]')
end

def contact_numbers_for(str)
contact_numbers.xpath("text()[contains(.,'#{str}')]").map do |n|
n.text.gsub("#{str}.", '').tidy
end.reject(&:empty?).join(';')
end
end
13 changes: 8 additions & 5 deletions scraper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@

require_rel 'lib'

def scrape(h)
url, klass = h.to_a.first
klass.new(response: Scraped::Request.new(url: url).response)
end

class MembersPage < Scraped::HTML
decorator Scraped::Response::Decorator::CleanUrls
Expand All @@ -24,12 +28,11 @@ class MembersPage < Scraped::HTML
end

start = 'http://www.tucamarapr.org/dnncamara/web/ComposiciondelaCamara/Biografia.aspx'
page = MembersPage.new(response: Scraped::Request.new(url: start).response)
data = page.members.map(&:to_h)
page = scrape start => MembersPage
data = page.members.map do |member|
member.to_h.merge((scrape member.source => MemberPage).to_h)
end
data.each { |mem| puts mem.reject { |_, v| v.to_s.empty? }.sort_by { |k, _| k }.to_h } if ENV['MORPH_DEBUG']

ScraperWiki.sqliteexecute('DROP TABLE data') rescue nil
ScraperWiki.save_sqlite(%i[id party area], data)

# visit each 'source' page to archive it
data.each { |p| open(p[:source]).read }

0 comments on commit 60de314

Please sign in to comment.