Minor style changes

This commit is contained in:
Nemo 2018-01-15 02:01:40 +05:30
parent 92b3175ce0
commit ecc340e641
2 changed files with 10 additions and 9 deletions

View File

@ -14,20 +14,21 @@ function dl_z() {
export -f dl_z export -f dl_z
dl "$DIRECTORY_URL" "directory.html" dl_z "$DIRECTORY_URL" "directory.html"
# Download all the listing pages # Download all the listing pages
while read -r LINK; do while read -r LINK; do
FILENAME="$(basename $LINK).html" FILENAME="$(basename $LINK).html"
dl "$LINK" "$FILENAME" dl_z "$LINK" "$FILENAME"
done <<< $(bundle exec ruby parse_dir.rb) done <<< $(bundle exec ruby parse_dir.rb)
# Download all the restaurant pages (~15k) # Download all the restaurant pages (~15k)
while read -r LINK; do while read -r LINK; do
FILENAME="$(basename $LINK).html" FILENAME="$(basename $LINK).html"
sem -j 30 --timeout 300% dl_z "$LINK" "restaurants/$FILENAME" echo $FILENAME
# sem -j 30 --timeout 300% dl_z "$LINK" "restaurants/$FILENAME"
done <<< $(bundle exec ruby parse_listing.rb) done <<< $(bundle exec ruby parse_listing.rb)
bundle exec ruby parse_restaurant.rb # bundle exec ruby parse_restaurant.rb
sem --wait # sem --wait

View File

@ -1,11 +1,11 @@
require 'nokogiri' require 'nokogiri'
require "csv" require 'csv'
restaurants = [] restaurants = []
CSV.open("database.csv", "wb") do |csv| CSV.open('database.csv', 'wb') do |csv|
csv << ["url", "title", "location", "address", "cuisine"] csv << ['url', 'title', 'location', 'address', 'cuisine']
Dir.glob("html/restaurants-*.html") do |file| Dir.glob('html/restaurants-*.html') do |file|
page = Nokogiri::HTML(open(file)) page = Nokogiri::HTML(open(file))
page.css('.plr10').each do |div| page.css('.plr10').each do |div|