bengaluru-food-census/parse_listing.rb

27 lines
571 B
Ruby
Raw Normal View History

2018-01-01 19:15:51 +00:00
require 'nokogiri'
2018-01-14 20:31:40 +00:00
require 'csv'
2018-01-01 19:15:51 +00:00
restaurants = []
2018-01-14 20:31:40 +00:00
CSV.open('database.csv', 'wb') do |csv|
csv << ['url', 'title', 'location', 'address', 'cuisine']
Dir.glob('html/restaurants-*.html') do |file|
2018-01-01 19:15:51 +00:00
page = Nokogiri::HTML(open(file))
page.css('.plr10').each do |div|
links = div.css('a')
spans = div.css('span')
title = links[0].text
location = links[1].text
address = spans[1].text
cuisine = spans[0].text
url = links[0]['href']
csv << [url, title, location, address, cuisine]
puts url
2018-01-01 19:15:51 +00:00
end
end
end