Now, although it takes 5GB to run this once, we get a more detailed CSV
This commit is contained in:
parent
1e67eb8281
commit
5921640da3
|
@ -4,25 +4,9 @@ require 'date'
|
|||
|
||||
restaurants = []
|
||||
|
||||
CSV.open("data/#{Date.today.to_s}.csv", 'wb') do |csv|
|
||||
csv << ['url', 'title', 'location', 'address', 'cuisine']
|
||||
Dir.glob('html/restaurants-*.html') do |file|
|
||||
page = Nokogiri::HTML(open(file))
|
||||
|
||||
page.css('.plr10').each do |div|
|
||||
links = div.css('a')
|
||||
spans = div.css('span')
|
||||
|
||||
|
||||
title = links[0].text
|
||||
location = links[1].text
|
||||
address = spans[1].text
|
||||
cuisine = spans[0].text
|
||||
url = links[0]['href']
|
||||
|
||||
csv << [url, title, location, address, cuisine]
|
||||
|
||||
puts url
|
||||
end
|
||||
Dir.glob('html/restaurants-*.html') do |file|
|
||||
page = Nokogiri::HTML(open(file))
|
||||
page.css('.plr10').each do |div|
|
||||
puts div.css('a')[0]['href']
|
||||
end
|
||||
end
|
|
@ -7,7 +7,7 @@ require "csv"
|
|||
|
||||
restaurants = []
|
||||
|
||||
csv = CSV.open("data/#{Date.today.to_s}-all.csv", 'w')
|
||||
csv = CSV.open("data/#{Date.today.to_s}.csv", 'w')
|
||||
|
||||
first = true
|
||||
Dir.glob('html/restaurants/*.html') do |file|
|
||||
|
@ -59,9 +59,11 @@ Dir.glob('html/restaurants/*.html') do |file|
|
|||
end
|
||||
rest['status'] = r['sections']['SECTION_BASIC_INFO']['res_status_text']
|
||||
if ratings and ratings.has_key? 'rating_streak'
|
||||
rest['last_rating_timestamp'] = ratings['rating_streak'][-1]['timestamp']
|
||||
rest['last_rating_timestamp'] = Time.at(ratings['rating_streak'][-1]['timestamp']).strftime('%Y-%m-%d')
|
||||
rest['last_rating_month'] = Time.at(ratings['rating_streak'][-1]['timestamp']).strftime('%Y-%m')
|
||||
else
|
||||
rest['last_rating_timestamp'] = Date.new(2020, 01,01).strftime('%s')
|
||||
rest['last_rating_timestamp'] = '1970-01-01'
|
||||
rest['last_rating_month'] = '1970-01'
|
||||
end
|
||||
rest['id'] = data['pages']['current']['resId']
|
||||
if first
|
||||
|
|
Loading…
Reference in New Issue