diff --git a/parse_listing.rb b/parse_listing.rb index a76553b..ad76327 100644 --- a/parse_listing.rb +++ b/parse_listing.rb @@ -4,25 +4,9 @@ require 'date' restaurants = [] -CSV.open("data/#{Date.today.to_s}.csv", 'wb') do |csv| - csv << ['url', 'title', 'location', 'address', 'cuisine'] - Dir.glob('html/restaurants-*.html') do |file| - page = Nokogiri::HTML(open(file)) - - page.css('.plr10').each do |div| - links = div.css('a') - spans = div.css('span') - - - title = links[0].text - location = links[1].text - address = spans[1].text - cuisine = spans[0].text - url = links[0]['href'] - - csv << [url, title, location, address, cuisine] - - puts url - end +Dir.glob('html/restaurants-*.html') do |file| + page = Nokogiri::HTML(open(file)) + page.css('.plr10').each do |div| + puts div.css('a')[0]['href'] end end \ No newline at end of file diff --git a/parse_restaurants.rb b/parse_restaurants.rb index d512f4c..9835184 100644 --- a/parse_restaurants.rb +++ b/parse_restaurants.rb @@ -7,7 +7,7 @@ require "csv" restaurants = [] -csv = CSV.open("data/#{Date.today.to_s}-all.csv", 'w') +csv = CSV.open("data/#{Date.today.to_s}.csv", 'w') first = true Dir.glob('html/restaurants/*.html') do |file| @@ -59,9 +59,11 @@ Dir.glob('html/restaurants/*.html') do |file| end rest['status'] = r['sections']['SECTION_BASIC_INFO']['res_status_text'] if ratings and ratings.has_key? 'rating_streak' - rest['last_rating_timestamp'] = ratings['rating_streak'][-1]['timestamp'] + rest['last_rating_timestamp'] = Time.at(ratings['rating_streak'][-1]['timestamp']).strftime('%Y-%m-%d') + rest['last_rating_month'] = Time.at(ratings['rating_streak'][-1]['timestamp']).strftime('%Y-%m') else - rest['last_rating_timestamp'] = Date.new(2020, 01,01).strftime('%s') + rest['last_rating_timestamp'] = '1970-01-01' + rest['last_rating_month'] = '1970-01' end rest['id'] = data['pages']['current']['resId'] if first