Adds parser and CSV generator
This commit is contained in:
parent
99924285c5
commit
39f4adfc55
|
@ -1,2 +1,3 @@
|
|||
vendor/
|
||||
html/
|
||||
html/
|
||||
*.csv
|
|
@ -11,4 +11,4 @@ Zomato does not support HTTP/1.1, so wget can't be used.
|
|||
|
||||
- Keep track of historical data
|
||||
- Does not use the API (since the rate-limit is too low at 1k/day)
|
||||
+ We need to checkou around 8k restaurant status
|
||||
+ We need to checkout around 8k restaurant status
|
|
@ -18,3 +18,5 @@ while read -r LINK; do
|
|||
FILENAME="$(basename $LINK).html"
|
||||
dl "$LINK" "$FILENAME"
|
||||
done <<< $(bundle exec ruby parse_dir.rb)
|
||||
|
||||
bundle exec ruby parse_restaurant.rb
|
|
@ -0,0 +1,26 @@
|
|||
require 'nokogiri'
|
||||
require "csv"
|
||||
|
||||
restaurants = []
|
||||
|
||||
CSV.open("database.csv", "wb") do |csv|
|
||||
csv << ["url", "title", "location", "address", "cuisine"]
|
||||
Dir.glob("html/restaurants-*.html") do |file|
|
||||
page = Nokogiri::HTML(open(file))
|
||||
|
||||
page.css('.plr10').each do |div|
|
||||
links = div.css('a')
|
||||
spans = div.css('span')
|
||||
|
||||
|
||||
title = links[0].text
|
||||
location = links[1].text
|
||||
address = spans[1].text
|
||||
cuisine = spans[0].text
|
||||
url = links[0]['href']
|
||||
|
||||
csv << [url, title, location, address, cuisine]
|
||||
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue