bengaluru-food-census/bootstrap.sh

34 lines
861 B
Bash
Executable File

#!/bin/bash
ZOMATO_ROOT_URL="https://www.zomato.com"
ZOMATO_CITY=bangalore
DIRECTORY_URL="$ZOMATO_ROOT_URL/$ZOMATO_CITY/directory"
USER_AGENT="Mozilla/Gecko/Firefox/58.0"
mkdir -p html/restaurants
function dl_z() {
echo "[+] $2"
[ ! -f "html/$2" ] && curl -sS --http2-prior-knowledge --compressed -H "User-Agent: $USER_AGENT" $1 > "html/$2"
}
export -f dl_z
dl_z "$DIRECTORY_URL" "directory.html"
# Download all the listing pages
while read -r LINK; do
FILENAME="$(basename $LINK).html"
dl_z "$LINK" "$FILENAME"
done <<< $(bundle exec ruby parse_dir.rb)
# Download all the restaurant pages (~15k)
while read -r LINK; do
FILENAME="$(basename $LINK).html"
echo $FILENAME
sem -j 30 --timeout 300% dl_z "$LINK" "restaurants/$FILENAME"
done <<< $(bundle exec ruby parse_listing.rb)
bundle exec ruby parse_restaurant.rb
sem --wait