diff --git a/lib/member_page.rb b/lib/member_page.rb new file mode 100644 index 00000000..4c5c29bb --- /dev/null +++ b/lib/member_page.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require 'scraped' + +class MemberPage < Scraped::HTML + field :party do + noko.at_css('.partyBio').text.tidy + end + + field :phone do + contact_numbers_for('Tel') + end + + field :fax do + contact_numbers_for('Fax') + end + + field :tty do + contact_numbers_for('TTY') + end + + private + + def contact_numbers + noko.xpath('.//span[@class="data-type"]') + end + + def contact_numbers_for(str) + contact_numbers.xpath("text()[contains(.,'#{str}')]").map do |n| + n.text.gsub("#{str}.", '').tidy + end.reject(&:empty?).join(';') + end +end diff --git a/scraper.rb b/scraper.rb index 9df5b767..ef796f78 100644 --- a/scraper.rb +++ b/scraper.rb @@ -12,6 +12,10 @@ require_rel 'lib' +def scrape(h) + url, klass = h.to_a.first + klass.new(response: Scraped::Request.new(url: url).response) +end class MembersPage < Scraped::HTML decorator Scraped::Response::Decorator::CleanUrls @@ -24,12 +28,11 @@ class MembersPage < Scraped::HTML end start = 'http://www.tucamarapr.org/dnncamara/web/ComposiciondelaCamara/Biografia.aspx' -page = MembersPage.new(response: Scraped::Request.new(url: start).response) -data = page.members.map(&:to_h) +page = scrape start => MembersPage +data = page.members.map do |member| + member.to_h.merge((scrape member.source => MemberPage).to_h) +end data.each { |mem| puts mem.reject { |_, v| v.to_s.empty? }.sort_by { |k, _| k }.to_h } if ENV['MORPH_DEBUG'] ScraperWiki.sqliteexecute('DROP TABLE data') rescue nil ScraperWiki.save_sqlite(%i[id party area], data) - -# visit each 'source' page to archive it -data.each { |p| open(p[:source]).read }