Initial commit
This commit is contained in:
commit
99924285c5
|
@ -0,0 +1,2 @@
|
||||||
|
vendor/
|
||||||
|
html/
|
|
@ -0,0 +1,4 @@
|
||||||
|
source 'https://rubygems.org'
|
||||||
|
|
||||||
|
gem 'net-http2'
|
||||||
|
gem 'nokogiri'
|
|
@ -0,0 +1,19 @@
|
||||||
|
GEM
|
||||||
|
remote: https://rubygems.org/
|
||||||
|
specs:
|
||||||
|
http-2 (0.8.4)
|
||||||
|
mini_portile2 (2.3.0)
|
||||||
|
net-http2 (0.16.0)
|
||||||
|
http-2 (= 0.8.4)
|
||||||
|
nokogiri (1.8.1)
|
||||||
|
mini_portile2 (~> 2.3.0)
|
||||||
|
|
||||||
|
PLATFORMS
|
||||||
|
ruby
|
||||||
|
|
||||||
|
DEPENDENCIES
|
||||||
|
net-http2
|
||||||
|
nokogiri
|
||||||
|
|
||||||
|
BUNDLED WITH
|
||||||
|
1.16.1
|
|
@ -0,0 +1,14 @@
|
||||||
|
# bengaluru-food-census
|
||||||
|
|
||||||
|
Keep track of restaurant openings and closures in the city.
|
||||||
|
|
||||||
|
# Quirk
|
||||||
|
|
||||||
|
Zomato does not support HTTP/1.1, so wget can't be used.
|
||||||
|
|
||||||
|
|
||||||
|
# Features
|
||||||
|
|
||||||
|
- Keep track of historical data
|
||||||
|
- Does not use the API (since the rate-limit is too low at 1k/day)
|
||||||
|
+ We need to checkou around 8k restaurant status
|
|
@ -0,0 +1,20 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
ZOMATO_ROOT_URL=https://www.zomato.com
|
||||||
|
ZOMATO_CITY=bangalore
|
||||||
|
DIRECTORY_URL="$ZOMATO_ROOT_URL/$ZOMATO_CITY/directory"
|
||||||
|
USER_AGENT="Mozilla/Gecko/Firefox/58.0"
|
||||||
|
|
||||||
|
mkdir -p html
|
||||||
|
|
||||||
|
function dl() {
|
||||||
|
echo "[+] $2"
|
||||||
|
curl -sS --http2-prior-knowledge --compressed -H "User-Agent: $USER_AGENT" $1 > "html/$2"
|
||||||
|
}
|
||||||
|
|
||||||
|
dl "$DIRECTORY_URL" "directory.html"
|
||||||
|
|
||||||
|
while read -r LINK; do
|
||||||
|
FILENAME="$(basename $LINK).html"
|
||||||
|
dl "$LINK" "$FILENAME"
|
||||||
|
done <<< $(bundle exec ruby parse_dir.rb)
|
|
@ -0,0 +1,11 @@
|
||||||
|
require 'nokogiri'
|
||||||
|
|
||||||
|
|
||||||
|
page = Nokogiri::HTML(open("html/directory.html"))
|
||||||
|
|
||||||
|
page.css('a').each do |link|
|
||||||
|
|
||||||
|
if link['href'] and link['href'][0,55] === 'https://www.zomato.com/bangalore/directory/restaurants-'
|
||||||
|
puts link['href']
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in New Issue