Split to a Thing class, and add more parsers

This commit is contained in:
Nemo 2020-03-29 02:10:41 +05:30
parent f8debde5c5
commit a8a396006c
5 changed files with 77 additions and 21 deletions

View File

@ -1,9 +1,10 @@
require "./spec_helper"
describe Muse::Dl::Book do
html = File.new("spec/fixtures/book-875.html").gets_to_end
book = Muse::Dl::Book.new html
it "it should parse the infobox for 875" do
html = File.new("spec/fixtures/book-875.html").gets_to_end
book = Muse::Dl::Book.new html
book.info["ISBN"].should eq "9780813928517"
book.info["Related ISBN"].should eq "9780813928456"
book.info["OCLC"].should eq "755633557"
@ -11,6 +12,21 @@ describe Muse::Dl::Book do
book.info["Launched on MUSE"].should eq "2012-01-01"
book.info["Language"].should eq "English"
book.info["Open Access"].should eq "No"
book.title.should eq "Accommodating Revolutions: Virginia's Northern Neck in an Era of Transformations, 1760-1810"
book.author.should eq "Albert H. Tillson, Jr."
book.date.should eq "2010"
book.publisher.should eq "University of Virginia Press"
end
it "should parse the summary" do
book.summary.should eq "Accommodating Revolutions addresses a controversy of long standing among historians of eighteenth-century America and Virginia—the extent to which internal conflict and/or consensus characterized the society of the Revolutionary era. In particular, it emphasizes the complex and often self-defeating actions and decisions of dissidents and other non-elite groups. By focusing on a small but significant region, Tillson elucidates the multiple and interrelated sources of conflict that beset Revolutionary Virginia, but also explains why in the end so little changed."
book.summary_html.should eq "<u>Accommodating Revolutions </u>addresses a controversy of long standing among historians of eighteenth-century America and Virginia—the extent to which internal conflict and/or consensus characterized the society of the Revolutionary era. In particular, it emphasizes the complex and often self-defeating actions and decisions of dissidents and other non-elite groups. By focusing on a small but significant region, Tillson elucidates the multiple and interrelated sources of conflict that beset Revolutionary Virginia, but also explains why in the end so little changed."
end
it "should parse the cover" do
book.cover_url.should eq "https://muse.jhu.edu/book/875/image/front_cover.jpg"
book.thumbnail_url.should eq "https://muse.jhu.edu/book/875/image/front_cover.jpg?format=180"
end
it "it should parse the DOI for 68534" do

View File

@ -1,13 +1,6 @@
require "./infoparser.cr"
require "myhtml"
require "./thing.cr"
module Muse::Dl
class Book
@info = Hash(String, String).new
getter :info
def initialize(html : String)
@info = InfoParser.infobox(Myhtml::Parser.new html)
end
class Book < Muse::Dl::Thing
end
end

View File

@ -16,7 +16,29 @@ module Muse::Dl
return info
end
def self.title
def self.title(myhtml : Myhtml::Parser)
myhtml.css("#book_about_info .title").map(&.inner_text).to_a[0].strip
end
def self.author(myhtml : Myhtml::Parser)
myhtml.css("#book_about_info .author").map(&.inner_text).to_a[0].strip
end
def self.date(myhtml : Myhtml::Parser)
myhtml.css("#book_about_info .date").map(&.inner_text).to_a[0].strip
end
def self.publisher(myhtml : Myhtml::Parser)
myhtml.css("#book_about_info .pub a").map(&.inner_text).to_a[0].strip
end
def self.summary(myhtml : Myhtml::Parser)
myhtml.css("#book_about_info .card_summary").map(&.inner_text).to_a[0].strip
end
def self.summary_html(myhtml : Myhtml::Parser)
return "TODO"
myhtml.css("#book_about_info .card_summary").map(&.tag_text).to_a[0].strip
end
end
end

View File

@ -1,13 +1,6 @@
require "./infoparser.cr"
require "myhtml"
require "./thing.cr"
module Muse::Dl
class Journal
@info = Hash(String, String).new
getter :info
def initialize(html : String)
@info = InfoParser.infobox(Myhtml::Parser.new html)
end
class Journal < Muse::Dl::Thing
end
end

32
src/thing.cr Normal file
View File

@ -0,0 +1,32 @@
require "./infoparser.cr"
require "myhtml"
module Muse::Dl
class Thing
@info = Hash(String, String).new
@title : String
@author : String
@date : String
@publisher : String
@summary : String
@summary_html : String
@cover_url : String
@thumbnail_url : String
getter :info, :title, :author, :date, :publisher, :summary, :summary_html, :cover_url, :thumbnail_url
def initialize(html : String)
h = Myhtml::Parser.new html
@info = InfoParser.infobox(h)
@title = InfoParser.title(h)
@author = InfoParser.author(h)
@date = InfoParser.date(h)
@publisher = InfoParser.publisher(h)
@summary = InfoParser.summary(h)
@summary_html = InfoParser.summary_html(h)
@cover_url = "TODO"
@thumbnail_url = "TODO"
end
end
end