mirror of https://github.com/captn3m0/muse-dl.git
Adds support for .formats and fixes tests
This commit is contained in:
parent
98da71989c
commit
2a78be1022
|
@ -17,6 +17,8 @@ describe Muse::Dl::Book do
|
||||||
book.author.should eq "Albert H. Tillson, Jr."
|
book.author.should eq "Albert H. Tillson, Jr."
|
||||||
book.date.should eq "2010"
|
book.date.should eq "2010"
|
||||||
book.publisher.should eq "University of Virginia Press"
|
book.publisher.should eq "University of Virginia Press"
|
||||||
|
book.formats.should contain :pdf
|
||||||
|
book.formats.should_not contain :html
|
||||||
end
|
end
|
||||||
|
|
||||||
it "should parse the summary" do
|
it "should parse the summary" do
|
||||||
|
@ -50,7 +52,7 @@ describe Muse::Dl::Book do
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
it "it should parse the DOI for 68534" do
|
it "should parse book/68534" do
|
||||||
html = File.new("spec/fixtures/book-68534.html").gets_to_end
|
html = File.new("spec/fixtures/book-68534.html").gets_to_end
|
||||||
book = Muse::Dl::Book.new html
|
book = Muse::Dl::Book.new html
|
||||||
book.info["ISBN"].should eq "9781501737695"
|
book.info["ISBN"].should eq "9781501737695"
|
||||||
|
@ -60,5 +62,14 @@ describe Muse::Dl::Book do
|
||||||
book.info["Language"].should eq "English"
|
book.info["Language"].should eq "English"
|
||||||
book.info["Open Access"].should eq "Yes"
|
book.info["Open Access"].should eq "Yes"
|
||||||
book.info["DOI"].should eq "10.1353/book.68534"
|
book.info["DOI"].should eq "10.1353/book.68534"
|
||||||
|
book.formats.should contain :html
|
||||||
|
book.formats.should_not contain :pdf
|
||||||
|
end
|
||||||
|
|
||||||
|
it "should note both formats for book/60322" do
|
||||||
|
html = File.new("spec/fixtures/book-60322.html").gets_to_end
|
||||||
|
book = Muse::Dl::Book.new html
|
||||||
|
book.formats.should contain :pdf
|
||||||
|
book.formats.should contain :html
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -25,7 +25,7 @@ module Muse::Dl
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
|
|
||||||
url = "https://muse.jhu.edu/chapter/#{chapter_id}"
|
url = "https://muse.jhu.edu/chapter/#{chapter_id}/pdf"
|
||||||
headers = HEADERS.merge({
|
headers = HEADERS.merge({
|
||||||
"Referer" => "https://muse.jhu.edu/verify?url=%2Fchapter%2F#{chapter_id}%2Fpdf",
|
"Referer" => "https://muse.jhu.edu/verify?url=%2Fchapter%2F#{chapter_id}%2Fpdf",
|
||||||
})
|
})
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
require "myhtml"
|
require "myhtml"
|
||||||
|
|
||||||
|
# https://github.com/kostya/myhtml/issues/19
|
||||||
|
struct Myhtml::Node
|
||||||
|
def inner_html
|
||||||
|
String.build { |buf| children.each &.to_html(buf) }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
module Muse::Dl
|
module Muse::Dl
|
||||||
class InfoParser
|
class InfoParser
|
||||||
def self.infobox(myhtml : Myhtml::Parser)
|
def self.infobox(myhtml : Myhtml::Parser)
|
||||||
|
@ -16,6 +23,13 @@ module Muse::Dl
|
||||||
return info
|
return info
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.id(myhtml : Myhtml::Parser)
|
||||||
|
searchid = myhtml.css("#search_within_book_id").first
|
||||||
|
if searchid
|
||||||
|
searchid.attribute_by("value")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def self.title(myhtml : Myhtml::Parser)
|
def self.title(myhtml : Myhtml::Parser)
|
||||||
myhtml.css("#book_about_info .title").map(&.inner_text).to_a[0].strip
|
myhtml.css("#book_about_info .title").map(&.inner_text).to_a[0].strip
|
||||||
end
|
end
|
||||||
|
@ -42,8 +56,24 @@ module Muse::Dl
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.summary_html(myhtml : Myhtml::Parser)
|
def self.summary_html(myhtml : Myhtml::Parser)
|
||||||
return "TODO"
|
summary_div = myhtml.css("#book_about_info .card_summary")
|
||||||
myhtml.css("#book_about_info .card_summary").map(&.tag_text).to_a[0].strip
|
begin
|
||||||
|
summary_div.first.inner_html
|
||||||
|
rescue e : Exception
|
||||||
|
"NA"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.formats(myhtml : Myhtml::Parser)
|
||||||
|
formats = Set(Symbol).new
|
||||||
|
myhtml.css("img.icon").each do |icon|
|
||||||
|
url = icon.attribute_by("src")
|
||||||
|
if url
|
||||||
|
formats.add :html if /html/i.match url
|
||||||
|
formats.add :pdf if /pdf/i.match url
|
||||||
|
end
|
||||||
|
end
|
||||||
|
formats
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
10
src/thing.cr
10
src/thing.cr
|
@ -13,22 +13,26 @@ module Muse::Dl
|
||||||
@cover_url : String
|
@cover_url : String
|
||||||
@thumbnail_url : String
|
@thumbnail_url : String
|
||||||
@h : Myhtml::Parser
|
@h : Myhtml::Parser
|
||||||
|
@formats : Set(Symbol)
|
||||||
|
|
||||||
getter :info, :title, :author, :date, :publisher, :summary, :summary_html, :cover_url, :thumbnail_url
|
getter :info, :title, :author, :date, :publisher, :summary, :summary_html, :cover_url, :thumbnail_url, :formats
|
||||||
|
|
||||||
private getter :h
|
private getter :h
|
||||||
|
|
||||||
def initialize(html : String)
|
def initialize(html : String)
|
||||||
@h = Myhtml::Parser.new html
|
@h = Myhtml::Parser.new html
|
||||||
@info = InfoParser.infobox(h)
|
@info = InfoParser.infobox(h)
|
||||||
|
id : String | Nil = InfoParser.id(h)
|
||||||
@title = InfoParser.title(h)
|
@title = InfoParser.title(h)
|
||||||
@author = InfoParser.author(h)
|
@author = InfoParser.author(h)
|
||||||
@date = InfoParser.date(h)
|
@date = InfoParser.date(h)
|
||||||
@publisher = InfoParser.publisher(h)
|
@publisher = InfoParser.publisher(h)
|
||||||
@summary = InfoParser.summary(h)
|
@summary = InfoParser.summary(h)
|
||||||
@summary_html = InfoParser.summary_html(h)
|
@summary_html = InfoParser.summary_html(h)
|
||||||
@cover_url = "TODO"
|
@formats = InfoParser.formats(h)
|
||||||
@thumbnail_url = "TODO"
|
# TODO: Make this work for journals as well
|
||||||
|
@cover_url = "https://muse.jhu.edu/book/#{id}/image/front_cover.jpg"
|
||||||
|
@thumbnail_url = "https://muse.jhu.edu/book/#{id}/image/front_cover.jpg?format=180"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue