Minor style changes
This commit is contained in:
parent
92b3175ce0
commit
ecc340e641
11
bootstrap.sh
11
bootstrap.sh
|
@ -14,20 +14,21 @@ function dl_z() {
|
||||||
|
|
||||||
export -f dl_z
|
export -f dl_z
|
||||||
|
|
||||||
dl "$DIRECTORY_URL" "directory.html"
|
dl_z "$DIRECTORY_URL" "directory.html"
|
||||||
|
|
||||||
# Download all the listing pages
|
# Download all the listing pages
|
||||||
while read -r LINK; do
|
while read -r LINK; do
|
||||||
FILENAME="$(basename $LINK).html"
|
FILENAME="$(basename $LINK).html"
|
||||||
dl "$LINK" "$FILENAME"
|
dl_z "$LINK" "$FILENAME"
|
||||||
done <<< $(bundle exec ruby parse_dir.rb)
|
done <<< $(bundle exec ruby parse_dir.rb)
|
||||||
|
|
||||||
# Download all the restaurant pages (~15k)
|
# Download all the restaurant pages (~15k)
|
||||||
while read -r LINK; do
|
while read -r LINK; do
|
||||||
FILENAME="$(basename $LINK).html"
|
FILENAME="$(basename $LINK).html"
|
||||||
sem -j 30 --timeout 300% dl_z "$LINK" "restaurants/$FILENAME"
|
echo $FILENAME
|
||||||
|
# sem -j 30 --timeout 300% dl_z "$LINK" "restaurants/$FILENAME"
|
||||||
done <<< $(bundle exec ruby parse_listing.rb)
|
done <<< $(bundle exec ruby parse_listing.rb)
|
||||||
|
|
||||||
bundle exec ruby parse_restaurant.rb
|
# bundle exec ruby parse_restaurant.rb
|
||||||
|
|
||||||
sem --wait
|
# sem --wait
|
|
@ -1,11 +1,11 @@
|
||||||
require 'nokogiri'
|
require 'nokogiri'
|
||||||
require "csv"
|
require 'csv'
|
||||||
|
|
||||||
restaurants = []
|
restaurants = []
|
||||||
|
|
||||||
CSV.open("database.csv", "wb") do |csv|
|
CSV.open('database.csv', 'wb') do |csv|
|
||||||
csv << ["url", "title", "location", "address", "cuisine"]
|
csv << ['url', 'title', 'location', 'address', 'cuisine']
|
||||||
Dir.glob("html/restaurants-*.html") do |file|
|
Dir.glob('html/restaurants-*.html') do |file|
|
||||||
page = Nokogiri::HTML(open(file))
|
page = Nokogiri::HTML(open(file))
|
||||||
|
|
||||||
page.css('.plr10').each do |div|
|
page.css('.plr10').each do |div|
|
||||||
|
|
Loading…
Reference in New Issue