Initial commit

This commit is contained in:
Nemo 2018-01-02 00:19:07 +05:30
commit 99924285c5
6 changed files with 70 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
vendor/
html/

4
Gemfile Normal file
View File

@ -0,0 +1,4 @@
source 'https://rubygems.org'
gem 'net-http2'
gem 'nokogiri'

19
Gemfile.lock Normal file
View File

@ -0,0 +1,19 @@
GEM
remote: https://rubygems.org/
specs:
http-2 (0.8.4)
mini_portile2 (2.3.0)
net-http2 (0.16.0)
http-2 (= 0.8.4)
nokogiri (1.8.1)
mini_portile2 (~> 2.3.0)
PLATFORMS
ruby
DEPENDENCIES
net-http2
nokogiri
BUNDLED WITH
1.16.1

14
README.md Normal file
View File

@ -0,0 +1,14 @@
# bengaluru-food-census
Keep track of restaurant openings and closures in the city.
# Quirk
Zomato does not support HTTP/1.1, so wget can't be used.
# Features
- Keep track of historical data
- Does not use the API (since the rate-limit is too low at 1k/day)
+ We need to checkou around 8k restaurant status

20
bootstrap.sh Executable file
View File

@ -0,0 +1,20 @@
#!/bin/bash
ZOMATO_ROOT_URL=https://www.zomato.com
ZOMATO_CITY=bangalore
DIRECTORY_URL="$ZOMATO_ROOT_URL/$ZOMATO_CITY/directory"
USER_AGENT="Mozilla/Gecko/Firefox/58.0"
mkdir -p html
function dl() {
echo "[+] $2"
curl -sS --http2-prior-knowledge --compressed -H "User-Agent: $USER_AGENT" $1 > "html/$2"
}
dl "$DIRECTORY_URL" "directory.html"
while read -r LINK; do
FILENAME="$(basename $LINK).html"
dl "$LINK" "$FILENAME"
done <<< $(bundle exec ruby parse_dir.rb)

11
parse_dir.rb Normal file
View File

@ -0,0 +1,11 @@
require 'nokogiri'
page = Nokogiri::HTML(open("html/directory.html"))
page.css('a').each do |link|
if link['href'] and link['href'][0,55] === 'https://www.zomato.com/bangalore/directory/restaurants-'
puts link['href']
end
end