Adds parser and CSV generator
This commit is contained in:
parent
99924285c5
commit
39f4adfc55
|
@ -1,2 +1,3 @@
|
||||||
vendor/
|
vendor/
|
||||||
html/
|
html/
|
||||||
|
*.csv
|
|
@ -11,4 +11,4 @@ Zomato does not support HTTP/1.1, so wget can't be used.
|
||||||
|
|
||||||
- Keep track of historical data
|
- Keep track of historical data
|
||||||
- Does not use the API (since the rate-limit is too low at 1k/day)
|
- Does not use the API (since the rate-limit is too low at 1k/day)
|
||||||
+ We need to checkou around 8k restaurant status
|
+ We need to checkout around 8k restaurant status
|
|
@ -18,3 +18,5 @@ while read -r LINK; do
|
||||||
FILENAME="$(basename $LINK).html"
|
FILENAME="$(basename $LINK).html"
|
||||||
dl "$LINK" "$FILENAME"
|
dl "$LINK" "$FILENAME"
|
||||||
done <<< $(bundle exec ruby parse_dir.rb)
|
done <<< $(bundle exec ruby parse_dir.rb)
|
||||||
|
|
||||||
|
bundle exec ruby parse_restaurant.rb
|
|
@ -0,0 +1,26 @@
|
||||||
|
require 'nokogiri'
|
||||||
|
require "csv"
|
||||||
|
|
||||||
|
restaurants = []
|
||||||
|
|
||||||
|
CSV.open("database.csv", "wb") do |csv|
|
||||||
|
csv << ["url", "title", "location", "address", "cuisine"]
|
||||||
|
Dir.glob("html/restaurants-*.html") do |file|
|
||||||
|
page = Nokogiri::HTML(open(file))
|
||||||
|
|
||||||
|
page.css('.plr10').each do |div|
|
||||||
|
links = div.css('a')
|
||||||
|
spans = div.css('span')
|
||||||
|
|
||||||
|
|
||||||
|
title = links[0].text
|
||||||
|
location = links[1].text
|
||||||
|
address = spans[1].text
|
||||||
|
cuisine = spans[0].text
|
||||||
|
url = links[0]['href']
|
||||||
|
|
||||||
|
csv << [url, title, location, address, cuisine]
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in New Issue