From 92b3175ce0f19ead7dacee43a46f3ea39f0fdfc5 Mon Sep 17 00:00:00 2001 From: Nemo Date: Sat, 6 Jan 2018 19:37:40 +0530 Subject: [PATCH] Minor changes --- bootstrap.sh | 6 +++++- parse_restaurant.rb => parse_listing.rb | 0 parse_restaurants.rb | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) rename parse_restaurant.rb => parse_listing.rb (100%) create mode 100644 parse_restaurants.rb diff --git a/bootstrap.sh b/bootstrap.sh index 02e5a24..27905e0 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -16,14 +16,18 @@ export -f dl_z dl "$DIRECTORY_URL" "directory.html" +# Download all the listing pages while read -r LINK; do FILENAME="$(basename $LINK).html" dl "$LINK" "$FILENAME" done <<< $(bundle exec ruby parse_dir.rb) +# Download all the restaurant pages (~15k) while read -r LINK; do FILENAME="$(basename $LINK).html" sem -j 30 --timeout 300% dl_z "$LINK" "restaurants/$FILENAME" -done <<< $(bundle exec ruby parse_restaurant.rb) +done <<< $(bundle exec ruby parse_listing.rb) + +bundle exec ruby parse_restaurant.rb sem --wait \ No newline at end of file diff --git a/parse_restaurant.rb b/parse_listing.rb similarity index 100% rename from parse_restaurant.rb rename to parse_listing.rb diff --git a/parse_restaurants.rb b/parse_restaurants.rb new file mode 100644 index 0000000..f1ae60b --- /dev/null +++ b/parse_restaurants.rb @@ -0,0 +1,4 @@ +require 'nokogiri' +require "csv" + +restaurants = [] \ No newline at end of file