Now, although it takes 5GB to run this once, we get a more detailed CSV
This commit is contained in:
parent
1e67eb8281
commit
5921640da3
|
@ -4,25 +4,9 @@ require 'date'
|
||||||
|
|
||||||
restaurants = []
|
restaurants = []
|
||||||
|
|
||||||
CSV.open("data/#{Date.today.to_s}.csv", 'wb') do |csv|
|
Dir.glob('html/restaurants-*.html') do |file|
|
||||||
csv << ['url', 'title', 'location', 'address', 'cuisine']
|
page = Nokogiri::HTML(open(file))
|
||||||
Dir.glob('html/restaurants-*.html') do |file|
|
page.css('.plr10').each do |div|
|
||||||
page = Nokogiri::HTML(open(file))
|
puts div.css('a')[0]['href']
|
||||||
|
|
||||||
page.css('.plr10').each do |div|
|
|
||||||
links = div.css('a')
|
|
||||||
spans = div.css('span')
|
|
||||||
|
|
||||||
|
|
||||||
title = links[0].text
|
|
||||||
location = links[1].text
|
|
||||||
address = spans[1].text
|
|
||||||
cuisine = spans[0].text
|
|
||||||
url = links[0]['href']
|
|
||||||
|
|
||||||
csv << [url, title, location, address, cuisine]
|
|
||||||
|
|
||||||
puts url
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
|
@ -7,7 +7,7 @@ require "csv"
|
||||||
|
|
||||||
restaurants = []
|
restaurants = []
|
||||||
|
|
||||||
csv = CSV.open("data/#{Date.today.to_s}-all.csv", 'w')
|
csv = CSV.open("data/#{Date.today.to_s}.csv", 'w')
|
||||||
|
|
||||||
first = true
|
first = true
|
||||||
Dir.glob('html/restaurants/*.html') do |file|
|
Dir.glob('html/restaurants/*.html') do |file|
|
||||||
|
@ -59,9 +59,11 @@ Dir.glob('html/restaurants/*.html') do |file|
|
||||||
end
|
end
|
||||||
rest['status'] = r['sections']['SECTION_BASIC_INFO']['res_status_text']
|
rest['status'] = r['sections']['SECTION_BASIC_INFO']['res_status_text']
|
||||||
if ratings and ratings.has_key? 'rating_streak'
|
if ratings and ratings.has_key? 'rating_streak'
|
||||||
rest['last_rating_timestamp'] = ratings['rating_streak'][-1]['timestamp']
|
rest['last_rating_timestamp'] = Time.at(ratings['rating_streak'][-1]['timestamp']).strftime('%Y-%m-%d')
|
||||||
|
rest['last_rating_month'] = Time.at(ratings['rating_streak'][-1]['timestamp']).strftime('%Y-%m')
|
||||||
else
|
else
|
||||||
rest['last_rating_timestamp'] = Date.new(2020, 01,01).strftime('%s')
|
rest['last_rating_timestamp'] = '1970-01-01'
|
||||||
|
rest['last_rating_month'] = '1970-01'
|
||||||
end
|
end
|
||||||
rest['id'] = data['pages']['current']['resId']
|
rest['id'] = data['pages']['current']['resId']
|
||||||
if first
|
if first
|
||||||
|
|
Loading…
Reference in New Issue