2018-01-01 19:15:51 +00:00
|
|
|
require 'nokogiri'
|
2018-01-14 20:31:40 +00:00
|
|
|
require 'csv'
|
2018-03-30 08:20:03 +00:00
|
|
|
require 'date'
|
2018-01-01 19:15:51 +00:00
|
|
|
|
|
|
|
restaurants = []
|
|
|
|
|
2018-03-30 08:20:03 +00:00
|
|
|
CSV.open("#{Date.today.to_s}.csv", 'wb') do |csv|
|
2018-01-14 20:31:40 +00:00
|
|
|
csv << ['url', 'title', 'location', 'address', 'cuisine']
|
|
|
|
Dir.glob('html/restaurants-*.html') do |file|
|
2018-01-01 19:15:51 +00:00
|
|
|
page = Nokogiri::HTML(open(file))
|
|
|
|
|
|
|
|
page.css('.plr10').each do |div|
|
|
|
|
links = div.css('a')
|
|
|
|
spans = div.css('span')
|
|
|
|
|
|
|
|
|
|
|
|
title = links[0].text
|
|
|
|
location = links[1].text
|
|
|
|
address = spans[1].text
|
|
|
|
cuisine = spans[0].text
|
|
|
|
url = links[0]['href']
|
|
|
|
|
|
|
|
csv << [url, title, location, address, cuisine]
|
|
|
|
|
2018-01-01 19:35:00 +00:00
|
|
|
puts url
|
2018-01-01 19:15:51 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|