Browse Source

Initial commit

ci
Nemo 3 years ago
commit
99924285c5
6 changed files with 70 additions and 0 deletions
  1. +2
    -0
      .gitignore
  2. +4
    -0
      Gemfile
  3. +19
    -0
      Gemfile.lock
  4. +14
    -0
      README.md
  5. +20
    -0
      bootstrap.sh
  6. +11
    -0
      parse_dir.rb

+ 2
- 0
.gitignore View File

@ -0,0 +1,2 @@
vendor/
html/

+ 4
- 0
Gemfile View File

@ -0,0 +1,4 @@
source 'https://rubygems.org'
gem 'net-http2'
gem 'nokogiri'

+ 19
- 0
Gemfile.lock View File

@ -0,0 +1,19 @@
GEM
remote: https://rubygems.org/
specs:
http-2 (0.8.4)
mini_portile2 (2.3.0)
net-http2 (0.16.0)
http-2 (= 0.8.4)
nokogiri (1.8.1)
mini_portile2 (~> 2.3.0)
PLATFORMS
ruby
DEPENDENCIES
net-http2
nokogiri
BUNDLED WITH
1.16.1

+ 14
- 0
README.md View File

@ -0,0 +1,14 @@
# bengaluru-food-census
Keep track of restaurant openings and closures in the city.
# Quirk
Zomato does not support HTTP/1.1, so wget can't be used.
# Features
- Keep track of historical data
- Does not use the API (since the rate-limit is too low at 1k/day)
+ We need to checkou around 8k restaurant status

+ 20
- 0
bootstrap.sh View File

@ -0,0 +1,20 @@
#!/bin/bash
ZOMATO_ROOT_URL=https://www.zomato.com
ZOMATO_CITY=bangalore
DIRECTORY_URL="$ZOMATO_ROOT_URL/$ZOMATO_CITY/directory"
USER_AGENT="Mozilla/Gecko/Firefox/58.0"
mkdir -p html
function dl() {
echo "[+] $2"
curl -sS --http2-prior-knowledge --compressed -H "User-Agent: $USER_AGENT" $1 > "html/$2"
}
dl "$DIRECTORY_URL" "directory.html"
while read -r LINK; do
FILENAME="$(basename $LINK).html"
dl "$LINK" "$FILENAME"
done <<< $(bundle exec ruby parse_dir.rb)

+ 11
- 0
parse_dir.rb View File

@ -0,0 +1,11 @@
require 'nokogiri'
page = Nokogiri::HTML(open("html/directory.html"))
page.css('a').each do |link|
if link['href'] and link['href'][0,55] === 'https://www.zomato.com/bangalore/directory/restaurants-'
puts link['href']
end
end

Loading…
Cancel
Save