2020-03-28 20:02:55 +00:00
|
|
|
require "myhtml"
|
|
|
|
|
2020-03-31 20:03:54 +00:00
|
|
|
# https://github.com/kostya/myhtml/issues/19
|
|
|
|
struct Myhtml::Node
|
|
|
|
def inner_html
|
|
|
|
String.build { |buf| children.each &.to_html(buf) }
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-03-28 20:02:55 +00:00
|
|
|
module Muse::Dl
|
|
|
|
class InfoParser
|
2020-03-28 20:14:43 +00:00
|
|
|
def self.infobox(myhtml : Myhtml::Parser)
|
2020-03-28 20:02:55 +00:00
|
|
|
info = Hash(String, String).new
|
|
|
|
myhtml.css(".details_row").each do |row|
|
|
|
|
label = row.css(".cell").map(&.inner_text).to_a[0].strip
|
|
|
|
value = row.css(".cell").map(&.inner_text).to_a[1].strip
|
|
|
|
case label
|
|
|
|
when "MARC Record"
|
|
|
|
else
|
|
|
|
info[label] = value
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return info
|
|
|
|
end
|
2020-03-28 20:14:43 +00:00
|
|
|
|
2020-03-31 20:03:54 +00:00
|
|
|
def self.id(myhtml : Myhtml::Parser)
|
|
|
|
searchid = myhtml.css("#search_within_book_id").first
|
|
|
|
if searchid
|
|
|
|
searchid.attribute_by("value")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-03-28 20:40:41 +00:00
|
|
|
def self.title(myhtml : Myhtml::Parser)
|
|
|
|
myhtml.css("#book_about_info .title").map(&.inner_text).to_a[0].strip
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.author(myhtml : Myhtml::Parser)
|
2020-03-29 18:38:10 +00:00
|
|
|
myhtml.css("#book_about_info .author").map(&.inner_text).to_a[0].strip.gsub("<BR>", ", ").gsub("\n", " ")
|
2020-03-28 20:40:41 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def self.date(myhtml : Myhtml::Parser)
|
2020-04-03 19:40:11 +00:00
|
|
|
begin
|
|
|
|
myhtml.css("#book_about_info .date").map(&.inner_text).to_a[0].strip
|
|
|
|
rescue e : Exception
|
|
|
|
nil
|
|
|
|
end
|
2020-03-28 20:40:41 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def self.publisher(myhtml : Myhtml::Parser)
|
|
|
|
myhtml.css("#book_about_info .pub a").map(&.inner_text).to_a[0].strip
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.summary(myhtml : Myhtml::Parser)
|
2020-03-29 18:38:10 +00:00
|
|
|
begin
|
|
|
|
return myhtml.css("#book_about_info .card_summary").map(&.inner_text).to_a[0].strip
|
|
|
|
rescue e : Exception
|
|
|
|
STDERR.puts "Could not fetch summary"
|
|
|
|
return "NA"
|
|
|
|
end
|
2020-03-28 20:40:41 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def self.summary_html(myhtml : Myhtml::Parser)
|
2020-03-31 20:03:54 +00:00
|
|
|
summary_div = myhtml.css("#book_about_info .card_summary")
|
|
|
|
begin
|
|
|
|
summary_div.first.inner_html
|
|
|
|
rescue e : Exception
|
|
|
|
"NA"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.formats(myhtml : Myhtml::Parser)
|
|
|
|
formats = Set(Symbol).new
|
|
|
|
myhtml.css("img.icon").each do |icon|
|
|
|
|
url = icon.attribute_by("src")
|
|
|
|
if url
|
|
|
|
formats.add :html if /html/i.match url
|
|
|
|
formats.add :pdf if /pdf/i.match url
|
|
|
|
end
|
|
|
|
end
|
|
|
|
formats
|
2020-03-28 20:14:43 +00:00
|
|
|
end
|
2020-03-28 20:02:55 +00:00
|
|
|
end
|
|
|
|
end
|