mirror of https://github.com/captn3m0/muse-dl.git
Adds a infobox parser
This commit is contained in:
parent
e7754344d8
commit
df079ceaa0
|
@ -0,0 +1,96 @@
|
|||
require "./spec_helper"
|
||||
|
||||
describe Muse::Dl::Book do
|
||||
it "it should parse the infobox" do
|
||||
html = <<-EOT
|
||||
<div class="column full">
|
||||
<div class="title_wrap details">
|
||||
<h2>Additional Information</h2>
|
||||
</div>
|
||||
<div class="details_tbl">
|
||||
<div class="details_row">
|
||||
<div class="cell label">
|
||||
ISBN
|
||||
</div>
|
||||
<div class="cell">
|
||||
9780813928517
|
||||
</div>
|
||||
</div>
|
||||
<div class="details_row">
|
||||
<div class="cell label">
|
||||
Related ISBN
|
||||
</div>
|
||||
<div class="cell">
|
||||
9780813928456
|
||||
</div>
|
||||
</div>
|
||||
<div class="details_row">
|
||||
<div class="cell label">
|
||||
MARC Record
|
||||
</div>
|
||||
<div class="cell">
|
||||
<a href="https://about.muse.jhu.edu/lib/metadata?filename=muse_book_875&no_auth=1&format=marc&content_ids=book:875">Download</a>
|
||||
</div>
|
||||
</div>
|
||||
<div class="details_row">
|
||||
<div class="cell label">
|
||||
OCLC
|
||||
</div>
|
||||
<div class="cell">
|
||||
755633557
|
||||
</div>
|
||||
</div>
|
||||
<div class="details_row">
|
||||
<div class="cell label">
|
||||
Pages
|
||||
</div>
|
||||
<div class="cell">
|
||||
432
|
||||
</div>
|
||||
</div>
|
||||
<div class="details_row">
|
||||
<div class="cell label">
|
||||
Launched on MUSE
|
||||
</div>
|
||||
<div class="cell">
|
||||
2012-01-01
|
||||
</div>
|
||||
</div>
|
||||
<div class="details_row">
|
||||
<div class="cell label">
|
||||
DOI
|
||||
</div>
|
||||
<div class="cell">
|
||||
<a href="https://doi.org/10.1353/book.68534" target="_blank">10.1353/book.68534<img src="/images/link_blue.png" alt="external link" style="vertical-align:top;"></a>
|
||||
</div>
|
||||
</div>
|
||||
<div class="details_row">
|
||||
<div class="cell label">
|
||||
Language
|
||||
</div>
|
||||
<div class="cell">
|
||||
English
|
||||
</div>
|
||||
</div>
|
||||
<div class="details_row">
|
||||
<div class="cell label">
|
||||
Open Access
|
||||
</div>
|
||||
<div class="cell">
|
||||
No
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
EOT
|
||||
book = Muse::Dl::Book.new html
|
||||
book.info["ISBN"].should eq "9780813928517"
|
||||
book.info["Related ISBN"].should eq "9780813928456"
|
||||
book.info["OCLC"].should eq "755633557"
|
||||
book.info["Pages"].should eq "432"
|
||||
book.info["Launched on MUSE"].should eq "2012-01-01"
|
||||
book.info["Language"].should eq "English"
|
||||
book.info["Open Access"].should eq "No"
|
||||
book.info["DOI"].should eq "10.1353/book.68534"
|
||||
end
|
||||
end
|
|
@ -0,0 +1,12 @@
|
|||
require "./infoparser.cr"
|
||||
|
||||
module Muse::Dl
|
||||
class Book
|
||||
@info = Hash(String, String).new
|
||||
getter :info
|
||||
|
||||
def initialize(html : String)
|
||||
@info = InfoParser.parse(html)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,20 @@
|
|||
require "myhtml"
|
||||
|
||||
module Muse::Dl
|
||||
class InfoParser
|
||||
def self.parse(html : String)
|
||||
info = Hash(String, String).new
|
||||
myhtml = Myhtml::Parser.new(html)
|
||||
myhtml.css(".details_row").each do |row|
|
||||
label = row.css(".cell").map(&.inner_text).to_a[0].strip
|
||||
value = row.css(".cell").map(&.inner_text).to_a[1].strip
|
||||
case label
|
||||
when "MARC Record"
|
||||
else
|
||||
info[label] = value
|
||||
end
|
||||
end
|
||||
return info
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,12 @@
|
|||
require "./infoparser.cr"
|
||||
|
||||
module Muse::Dl
|
||||
class Journal
|
||||
@info = Hash(String, String).new
|
||||
getter :info
|
||||
|
||||
def initialize(html : String)
|
||||
@info = InfoParser.parse(html)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue