mirror of https://github.com/captn3m0/muse-dl.git
Adds support for .formats and fixes tests
This commit is contained in:
parent
98da71989c
commit
2a78be1022
|
@ -17,6 +17,8 @@ describe Muse::Dl::Book do
|
|||
book.author.should eq "Albert H. Tillson, Jr."
|
||||
book.date.should eq "2010"
|
||||
book.publisher.should eq "University of Virginia Press"
|
||||
book.formats.should contain :pdf
|
||||
book.formats.should_not contain :html
|
||||
end
|
||||
|
||||
it "should parse the summary" do
|
||||
|
@ -50,7 +52,7 @@ describe Muse::Dl::Book do
|
|||
]
|
||||
end
|
||||
|
||||
it "it should parse the DOI for 68534" do
|
||||
it "should parse book/68534" do
|
||||
html = File.new("spec/fixtures/book-68534.html").gets_to_end
|
||||
book = Muse::Dl::Book.new html
|
||||
book.info["ISBN"].should eq "9781501737695"
|
||||
|
@ -60,5 +62,14 @@ describe Muse::Dl::Book do
|
|||
book.info["Language"].should eq "English"
|
||||
book.info["Open Access"].should eq "Yes"
|
||||
book.info["DOI"].should eq "10.1353/book.68534"
|
||||
book.formats.should contain :html
|
||||
book.formats.should_not contain :pdf
|
||||
end
|
||||
|
||||
it "should note both formats for book/60322" do
|
||||
html = File.new("spec/fixtures/book-60322.html").gets_to_end
|
||||
book = Muse::Dl::Book.new html
|
||||
book.formats.should contain :pdf
|
||||
book.formats.should contain :html
|
||||
end
|
||||
end
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -25,7 +25,7 @@ module Muse::Dl
|
|||
return
|
||||
end
|
||||
|
||||
url = "https://muse.jhu.edu/chapter/#{chapter_id}"
|
||||
url = "https://muse.jhu.edu/chapter/#{chapter_id}/pdf"
|
||||
headers = HEADERS.merge({
|
||||
"Referer" => "https://muse.jhu.edu/verify?url=%2Fchapter%2F#{chapter_id}%2Fpdf",
|
||||
})
|
||||
|
|
|
@ -1,5 +1,12 @@
|
|||
require "myhtml"
|
||||
|
||||
# https://github.com/kostya/myhtml/issues/19
|
||||
struct Myhtml::Node
|
||||
def inner_html
|
||||
String.build { |buf| children.each &.to_html(buf) }
|
||||
end
|
||||
end
|
||||
|
||||
module Muse::Dl
|
||||
class InfoParser
|
||||
def self.infobox(myhtml : Myhtml::Parser)
|
||||
|
@ -16,6 +23,13 @@ module Muse::Dl
|
|||
return info
|
||||
end
|
||||
|
||||
def self.id(myhtml : Myhtml::Parser)
|
||||
searchid = myhtml.css("#search_within_book_id").first
|
||||
if searchid
|
||||
searchid.attribute_by("value")
|
||||
end
|
||||
end
|
||||
|
||||
def self.title(myhtml : Myhtml::Parser)
|
||||
myhtml.css("#book_about_info .title").map(&.inner_text).to_a[0].strip
|
||||
end
|
||||
|
@ -42,8 +56,24 @@ module Muse::Dl
|
|||
end
|
||||
|
||||
def self.summary_html(myhtml : Myhtml::Parser)
|
||||
return "TODO"
|
||||
myhtml.css("#book_about_info .card_summary").map(&.tag_text).to_a[0].strip
|
||||
summary_div = myhtml.css("#book_about_info .card_summary")
|
||||
begin
|
||||
summary_div.first.inner_html
|
||||
rescue e : Exception
|
||||
"NA"
|
||||
end
|
||||
end
|
||||
|
||||
def self.formats(myhtml : Myhtml::Parser)
|
||||
formats = Set(Symbol).new
|
||||
myhtml.css("img.icon").each do |icon|
|
||||
url = icon.attribute_by("src")
|
||||
if url
|
||||
formats.add :html if /html/i.match url
|
||||
formats.add :pdf if /pdf/i.match url
|
||||
end
|
||||
end
|
||||
formats
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
10
src/thing.cr
10
src/thing.cr
|
@ -13,22 +13,26 @@ module Muse::Dl
|
|||
@cover_url : String
|
||||
@thumbnail_url : String
|
||||
@h : Myhtml::Parser
|
||||
@formats : Set(Symbol)
|
||||
|
||||
getter :info, :title, :author, :date, :publisher, :summary, :summary_html, :cover_url, :thumbnail_url
|
||||
getter :info, :title, :author, :date, :publisher, :summary, :summary_html, :cover_url, :thumbnail_url, :formats
|
||||
|
||||
private getter :h
|
||||
|
||||
def initialize(html : String)
|
||||
@h = Myhtml::Parser.new html
|
||||
@info = InfoParser.infobox(h)
|
||||
id : String | Nil = InfoParser.id(h)
|
||||
@title = InfoParser.title(h)
|
||||
@author = InfoParser.author(h)
|
||||
@date = InfoParser.date(h)
|
||||
@publisher = InfoParser.publisher(h)
|
||||
@summary = InfoParser.summary(h)
|
||||
@summary_html = InfoParser.summary_html(h)
|
||||
@cover_url = "TODO"
|
||||
@thumbnail_url = "TODO"
|
||||
@formats = InfoParser.formats(h)
|
||||
# TODO: Make this work for journals as well
|
||||
@cover_url = "https://muse.jhu.edu/book/#{id}/image/front_cover.jpg"
|
||||
@thumbnail_url = "https://muse.jhu.edu/book/#{id}/image/front_cover.jpg?format=180"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue