bengaluru-food-census/parse_listing.rb

28 lines
601 B
Ruby
Raw Normal View History

2018-01-01 19:15:51 +00:00
require 'nokogiri'
2018-01-14 20:31:40 +00:00
require 'csv'
2018-03-30 08:20:03 +00:00
require 'date'
2018-01-01 19:15:51 +00:00
restaurants = []
2018-04-08 17:31:29 +00:00
CSV.open("data/#{Date.today.to_s}.csv", 'wb') do |csv|
2018-01-14 20:31:40 +00:00
csv << ['url', 'title', 'location', 'address', 'cuisine']
Dir.glob('html/restaurants-*.html') do |file|
2018-01-01 19:15:51 +00:00
page = Nokogiri::HTML(open(file))
page.css('.plr10').each do |div|
links = div.css('a')
spans = div.css('span')
title = links[0].text
location = links[1].text
address = spans[1].text
cuisine = spans[0].text
url = links[0]['href']
csv << [url, title, location, address, cuisine]
puts url
2018-01-01 19:15:51 +00:00
end
end
end