-
Notifications
You must be signed in to change notification settings - Fork 1
/
scraper.rb
57 lines (44 loc) · 1.16 KB
/
scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/bin/env ruby
# encoding: utf-8
# frozen_string_literal: true
require 'pry'
require 'scraped'
require 'scraperwiki'
# require 'open-uri/cached'
# OpenURI::Cache.cache_path = '.cache'
require 'scraped_page_archive/open-uri'
class MembersList < Scraped::HTML
decorator Scraped::Response::Decorator::AbsoluteUrls
field :members do
noko.css('.uk-overlay').map do |mp|
fragment mp => MemberBox
end
end
end
class MemberBox < Scraped::HTML
field :name do
box.xpath('p/text()').text.tidy.sub('Hon. ', '')
end
field :area do
box.xpath('p/small').text.split('|').last.tidy
end
field :image do
noko.css('img/@src').text
end
field :source do
box.css('p a/@href').text
end
private
def box
noko.css('.uk-overlay-area-content')
end
end
url = 'https://www.gov.tc/index.php/government/house-of-assembly'
page = MembersList.new(response: Scraped::Request.new(url: url).response)
data = page.members.map(&:to_h).each do |mem|
# Not all members have individual links
mem[:source] = url if mem[:source].to_s.empty?
end
# puts data
ScraperWiki.sqliteexecute('DROP TABLE data') rescue nil
ScraperWiki.save_sqlite(%i(name area), data)