From ecc340e64178c76d545c24c87f610d00b622c7ab Mon Sep 17 00:00:00 2001 From: Nemo Date: Mon, 15 Jan 2018 02:01:40 +0530 Subject: [PATCH] Minor style changes --- bootstrap.sh | 11 ++++++----- parse_listing.rb | 8 ++++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/bootstrap.sh b/bootstrap.sh index 27905e0..8eafb2e 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -14,20 +14,21 @@ function dl_z() { export -f dl_z -dl "$DIRECTORY_URL" "directory.html" +dl_z "$DIRECTORY_URL" "directory.html" # Download all the listing pages while read -r LINK; do FILENAME="$(basename $LINK).html" - dl "$LINK" "$FILENAME" + dl_z "$LINK" "$FILENAME" done <<< $(bundle exec ruby parse_dir.rb) # Download all the restaurant pages (~15k) while read -r LINK; do FILENAME="$(basename $LINK).html" - sem -j 30 --timeout 300% dl_z "$LINK" "restaurants/$FILENAME" + echo $FILENAME + # sem -j 30 --timeout 300% dl_z "$LINK" "restaurants/$FILENAME" done <<< $(bundle exec ruby parse_listing.rb) -bundle exec ruby parse_restaurant.rb +# bundle exec ruby parse_restaurant.rb -sem --wait \ No newline at end of file +# sem --wait \ No newline at end of file diff --git a/parse_listing.rb b/parse_listing.rb index 8382f64..1029a32 100644 --- a/parse_listing.rb +++ b/parse_listing.rb @@ -1,11 +1,11 @@ require 'nokogiri' -require "csv" +require 'csv' restaurants = [] -CSV.open("database.csv", "wb") do |csv| - csv << ["url", "title", "location", "address", "cuisine"] - Dir.glob("html/restaurants-*.html") do |file| +CSV.open('database.csv', 'wb') do |csv| + csv << ['url', 'title', 'location', 'address', 'cuisine'] + Dir.glob('html/restaurants-*.html') do |file| page = Nokogiri::HTML(open(file)) page.css('.plr10').each do |div|