#!/bin/bash ZOMATO_ROOT_URL="https://www.zomato.com" ZOMATO_CITY=bangalore DIRECTORY_URL="$ZOMATO_ROOT_URL/$ZOMATO_CITY/directory" USER_AGENT="Mozilla/Gecko/Firefox/58.0" mkdir -p html/restaurants function dl_z() { echo "[+] $2" [ ! -f "html/$2" ] && curl -sS --http2-prior-knowledge --compressed -H "User-Agent: $USER_AGENT" $1 > "html/$2" } export -f dl_z dl_z "$DIRECTORY_URL" "directory.html" # Download all the listing pages while read -r LINK; do FILENAME="$(basename $LINK).html" dl_z "$LINK" "$FILENAME" done <<< $(bundle exec ruby parse_dir.rb) # Download all the restaurant pages (~15k) while read -r LINK; do FILENAME="$(basename $LINK).html" echo $FILENAME sem -j 30 --timeout 300% dl_z "$LINK" "restaurants/$FILENAME" done <<< $(bundle exec ruby parse_listing.rb) bundle exec ruby parse_restaurant.rb sem --wait