Adds a infobox parser

This commit is contained in:
Nemo 2020-03-29 01:32:55 +05:30
parent e7754344d8
commit df079ceaa0
4 changed files with 140 additions and 0 deletions

96
spec/book_spec.cr Normal file
View File

@ -0,0 +1,96 @@
require "./spec_helper"
describe Muse::Dl::Book do
it "it should parse the infobox" do
html = <<-EOT
<div class="column full">
<div class="title_wrap details">
<h2>Additional Information</h2>
</div>
<div class="details_tbl">
<div class="details_row">
<div class="cell label">
ISBN
</div>
<div class="cell">
9780813928517
</div>
</div>
<div class="details_row">
<div class="cell label">
Related ISBN
</div>
<div class="cell">
9780813928456
</div>
</div>
<div class="details_row">
<div class="cell label">
MARC Record
</div>
<div class="cell">
<a href="https://about.muse.jhu.edu/lib/metadata?filename=muse_book_875&amp;no_auth=1&amp;format=marc&amp;content_ids=book:875">Download</a>
</div>
</div>
<div class="details_row">
<div class="cell label">
OCLC
</div>
<div class="cell">
755633557
</div>
</div>
<div class="details_row">
<div class="cell label">
Pages
</div>
<div class="cell">
432
</div>
</div>
<div class="details_row">
<div class="cell label">
Launched on MUSE
</div>
<div class="cell">
2012-01-01
</div>
</div>
<div class="details_row">
<div class="cell label">
DOI
</div>
<div class="cell">
<a href="https://doi.org/10.1353/book.68534" target="_blank">10.1353/book.68534<img src="/images/link_blue.png" alt="external link" style="vertical-align:top;"></a>
</div>
</div>
<div class="details_row">
<div class="cell label">
Language
</div>
<div class="cell">
English
</div>
</div>
<div class="details_row">
<div class="cell label">
Open Access
</div>
<div class="cell">
No
</div>
</div>
</div>
</div>
EOT
book = Muse::Dl::Book.new html
book.info["ISBN"].should eq "9780813928517"
book.info["Related ISBN"].should eq "9780813928456"
book.info["OCLC"].should eq "755633557"
book.info["Pages"].should eq "432"
book.info["Launched on MUSE"].should eq "2012-01-01"
book.info["Language"].should eq "English"
book.info["Open Access"].should eq "No"
book.info["DOI"].should eq "10.1353/book.68534"
end
end

12
src/book.cr Normal file
View File

@ -0,0 +1,12 @@
require "./infoparser.cr"
module Muse::Dl
class Book
@info = Hash(String, String).new
getter :info
def initialize(html : String)
@info = InfoParser.parse(html)
end
end
end

20
src/infoparser.cr Normal file
View File

@ -0,0 +1,20 @@
require "myhtml"
module Muse::Dl
class InfoParser
def self.parse(html : String)
info = Hash(String, String).new
myhtml = Myhtml::Parser.new(html)
myhtml.css(".details_row").each do |row|
label = row.css(".cell").map(&.inner_text).to_a[0].strip
value = row.css(".cell").map(&.inner_text).to_a[1].strip
case label
when "MARC Record"
else
info[label] = value
end
end
return info
end
end
end

12
src/journal.cr Normal file
View File

@ -0,0 +1,12 @@
require "./infoparser.cr"
module Muse::Dl
class Journal
@info = Hash(String, String).new
getter :info
def initialize(html : String)
@info = InfoParser.parse(html)
end
end
end