You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
34 lines
861 B
Bash
34 lines
861 B
Bash
#!/bin/bash
|
|
|
|
ZOMATO_ROOT_URL="https://www.zomato.com"
|
|
ZOMATO_CITY=bangalore
|
|
DIRECTORY_URL="$ZOMATO_ROOT_URL/$ZOMATO_CITY/directory"
|
|
USER_AGENT="Mozilla/Gecko/Firefox/58.0"
|
|
|
|
mkdir -p html/restaurants
|
|
|
|
function dl_z() {
|
|
echo "[+] $2"
|
|
[ ! -f "html/$2" ] && curl -sS --http2-prior-knowledge --compressed -H "User-Agent: $USER_AGENT" $1 > "html/$2"
|
|
}
|
|
|
|
export -f dl_z
|
|
|
|
dl_z "$DIRECTORY_URL" "directory.html"
|
|
|
|
# Download all the listing pages
|
|
while read -r LINK; do
|
|
FILENAME="$(basename $LINK).html"
|
|
dl_z "$LINK" "$FILENAME"
|
|
done <<< $(bundle exec ruby parse_dir.rb)
|
|
|
|
# Download all the restaurant pages (~15k)
|
|
while read -r LINK; do
|
|
FILENAME="$(basename $LINK).html"
|
|
echo $FILENAME
|
|
sem -j 30 --timeout 300% dl_z "$LINK" "restaurants/$FILENAME"
|
|
done <<< $(bundle exec ruby parse_listing.rb)
|
|
|
|
bundle exec ruby parse_restaurant.rb
|
|
|
|
sem --wait |