Adds parser and CSV generator

This commit is contained in:
Nemo 2018-01-02 00:45:51 +05:30
parent 99924285c5
commit 39f4adfc55
4 changed files with 31 additions and 2 deletions

3
.gitignore vendored
View File

@ -1,2 +1,3 @@
vendor/
html/
html/
*.csv

View File

@ -11,4 +11,4 @@ Zomato does not support HTTP/1.1, so wget can't be used.
- Keep track of historical data
- Does not use the API (since the rate-limit is too low at 1k/day)
+ We need to checkou around 8k restaurant status
+ We need to checkout around 8k restaurant status

View File

@ -18,3 +18,5 @@ while read -r LINK; do
FILENAME="$(basename $LINK).html"
dl "$LINK" "$FILENAME"
done <<< $(bundle exec ruby parse_dir.rb)
bundle exec ruby parse_restaurant.rb

26
parse_restaurant.rb Normal file
View File

@ -0,0 +1,26 @@
require 'nokogiri'
require "csv"
restaurants = []
CSV.open("database.csv", "wb") do |csv|
csv << ["url", "title", "location", "address", "cuisine"]
Dir.glob("html/restaurants-*.html") do |file|
page = Nokogiri::HTML(open(file))
page.css('.plr10').each do |div|
links = div.css('a')
spans = div.css('span')
title = links[0].text
location = links[1].text
address = spans[1].text
cuisine = spans[0].text
url = links[0]['href']
csv << [url, title, location, address, cuisine]
end
end
end