From a8a396006c24f4a7513e122c9ca1e7b6fd74736f Mon Sep 17 00:00:00 2001 From: Nemo Date: Sun, 29 Mar 2020 02:10:41 +0530 Subject: [PATCH] Split to a Thing class, and add more parsers --- spec/book_spec.cr | 20 ++++++++++++++++++-- src/book.cr | 11 ++--------- src/infoparser.cr | 24 +++++++++++++++++++++++- src/journal.cr | 11 ++--------- src/thing.cr | 32 ++++++++++++++++++++++++++++++++ 5 files changed, 77 insertions(+), 21 deletions(-) create mode 100644 src/thing.cr diff --git a/spec/book_spec.cr b/spec/book_spec.cr index 1e33a4e..9feb6e8 100644 --- a/spec/book_spec.cr +++ b/spec/book_spec.cr @@ -1,9 +1,10 @@ require "./spec_helper" describe Muse::Dl::Book do + html = File.new("spec/fixtures/book-875.html").gets_to_end + book = Muse::Dl::Book.new html + it "it should parse the infobox for 875" do - html = File.new("spec/fixtures/book-875.html").gets_to_end - book = Muse::Dl::Book.new html book.info["ISBN"].should eq "9780813928517" book.info["Related ISBN"].should eq "9780813928456" book.info["OCLC"].should eq "755633557" @@ -11,6 +12,21 @@ describe Muse::Dl::Book do book.info["Launched on MUSE"].should eq "2012-01-01" book.info["Language"].should eq "English" book.info["Open Access"].should eq "No" + + book.title.should eq "Accommodating Revolutions: Virginia's Northern Neck in an Era of Transformations, 1760-1810" + book.author.should eq "Albert H. Tillson, Jr." + book.date.should eq "2010" + book.publisher.should eq "University of Virginia Press" + end + + it "should parse the summary" do + book.summary.should eq "Accommodating Revolutions addresses a controversy of long standing among historians of eighteenth-century America and Virginia—the extent to which internal conflict and/or consensus characterized the society of the Revolutionary era. In particular, it emphasizes the complex and often self-defeating actions and decisions of dissidents and other non-elite groups. By focusing on a small but significant region, Tillson elucidates the multiple and interrelated sources of conflict that beset Revolutionary Virginia, but also explains why in the end so little changed." + book.summary_html.should eq "Accommodating Revolutions addresses a controversy of long standing among historians of eighteenth-century America and Virginia—the extent to which internal conflict and/or consensus characterized the society of the Revolutionary era. In particular, it emphasizes the complex and often self-defeating actions and decisions of dissidents and other non-elite groups. By focusing on a small but significant region, Tillson elucidates the multiple and interrelated sources of conflict that beset Revolutionary Virginia, but also explains why in the end so little changed." + end + + it "should parse the cover" do + book.cover_url.should eq "https://muse.jhu.edu/book/875/image/front_cover.jpg" + book.thumbnail_url.should eq "https://muse.jhu.edu/book/875/image/front_cover.jpg?format=180" end it "it should parse the DOI for 68534" do diff --git a/src/book.cr b/src/book.cr index 0c47696..90cce6b 100644 --- a/src/book.cr +++ b/src/book.cr @@ -1,13 +1,6 @@ -require "./infoparser.cr" -require "myhtml" +require "./thing.cr" module Muse::Dl - class Book - @info = Hash(String, String).new - getter :info - - def initialize(html : String) - @info = InfoParser.infobox(Myhtml::Parser.new html) - end + class Book < Muse::Dl::Thing end end diff --git a/src/infoparser.cr b/src/infoparser.cr index dbbf538..f428105 100644 --- a/src/infoparser.cr +++ b/src/infoparser.cr @@ -16,7 +16,29 @@ module Muse::Dl return info end - def self.title + def self.title(myhtml : Myhtml::Parser) + myhtml.css("#book_about_info .title").map(&.inner_text).to_a[0].strip + end + + def self.author(myhtml : Myhtml::Parser) + myhtml.css("#book_about_info .author").map(&.inner_text).to_a[0].strip + end + + def self.date(myhtml : Myhtml::Parser) + myhtml.css("#book_about_info .date").map(&.inner_text).to_a[0].strip + end + + def self.publisher(myhtml : Myhtml::Parser) + myhtml.css("#book_about_info .pub a").map(&.inner_text).to_a[0].strip + end + + def self.summary(myhtml : Myhtml::Parser) + myhtml.css("#book_about_info .card_summary").map(&.inner_text).to_a[0].strip + end + + def self.summary_html(myhtml : Myhtml::Parser) + return "TODO" + myhtml.css("#book_about_info .card_summary").map(&.tag_text).to_a[0].strip end end end diff --git a/src/journal.cr b/src/journal.cr index 60d3ecd..1f3323a 100644 --- a/src/journal.cr +++ b/src/journal.cr @@ -1,13 +1,6 @@ -require "./infoparser.cr" -require "myhtml" +require "./thing.cr" module Muse::Dl - class Journal - @info = Hash(String, String).new - getter :info - - def initialize(html : String) - @info = InfoParser.infobox(Myhtml::Parser.new html) - end + class Journal < Muse::Dl::Thing end end diff --git a/src/thing.cr b/src/thing.cr new file mode 100644 index 0000000..1787180 --- /dev/null +++ b/src/thing.cr @@ -0,0 +1,32 @@ +require "./infoparser.cr" +require "myhtml" + +module Muse::Dl + class Thing + @info = Hash(String, String).new + @title : String + @author : String + @date : String + @publisher : String + @summary : String + @summary_html : String + @cover_url : String + @thumbnail_url : String + + getter :info, :title, :author, :date, :publisher, :summary, :summary_html, :cover_url, :thumbnail_url + + def initialize(html : String) + h = Myhtml::Parser.new html + @info = InfoParser.infobox(h) + @title = InfoParser.title(h) + @author = InfoParser.author(h) + @date = InfoParser.date(h) + @publisher = InfoParser.publisher(h) + @summary = InfoParser.summary(h) + @summary_html = InfoParser.summary_html(h) + + @cover_url = "TODO" + @thumbnail_url = "TODO" + end + end +end