diff --git a/Gemfile.lock b/Gemfile.lock index 9da8ee1..046fe6a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2,7 +2,7 @@ GEM remote: https://rubygems.org/ specs: mini_portile2 (2.3.0) - nokogiri (1.8.1) + nokogiri (1.8.4) mini_portile2 (~> 2.3.0) PLATFORMS @@ -12,4 +12,4 @@ DEPENDENCIES nokogiri BUNDLED WITH - 1.16.1 + 1.16.5 diff --git a/README.md b/README.md index f33bf3b..3a585a1 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Keep track of restaurant openings and closures in the city. # Quirks -- Zomato does not support HTTP/1.1, so wget can't be used. +- Zomato does not support HTTP/1.1, so wget can't be used. # Tech @@ -12,13 +12,12 @@ This project uses GNU Parallel, Ruby, Nokogiri, and curl. # Features -- Keep track of historical data using regularly generated CSV files -- Does not use the API (since the rate-limit is too low at 1k/day) - + We need to checkout around 15k restaurant status (closed or not) -- Keep track of whether restaurant is still alive or not -- Tweet any restaurant closures (or any new openings) - +- Keep track of historical data using regularly generated CSV files +- Does not use the API (since the rate-limit is too low at 1k/day) + - We need to checkout around 15k restaurant status (closed or not) +- Keep track of whether restaurant is still alive or not +- Tweet any restaurant closures (or any new openings) For now, run the following command to get a diff of new restaurants not in the old listings: -`q -d , "SELECT * from ./2018-MM-DD.csv WHERE c1 not in (SELECT c1 from 2018-MM-DD.csv)"` \ No newline at end of file +`q -d , "SELECT * from ./2018-MM-DD.csv WHERE c1 not in (SELECT c1 from 2018-MM-DD.csv)"` diff --git a/stats.rb b/stats.rb new file mode 100644 index 0000000..1d18ef1 --- /dev/null +++ b/stats.rb @@ -0,0 +1,37 @@ +require 'csv' +require 'set' + +data = {} +locales = Set.new + +Dir.glob("data/*.csv") do |file| + date_key = File.basename file, '.csv' + data[date_key] = {} + puts file + begin + CSV.foreach(file, headers: true) do |row| + locale = row[2] + if locale[-11..-1] == ", Bangalore" + locale = locale[0..-12] + end + locales << locale + data[date_key][locale]||=0 + data[date_key][locale]+=1 + end + rescue Exception => e + + end +end + +locales = locales.to_a.sort +CSV.open("stats.csv", "wb") do |csv| + csv << ["date"].concat(locales) + data.each do |date, census| + d = [date] + locales.each do |l| + locale_count = census[l]||0 + d << locale_count + end + csv << d + end +end