Chapter parsing in place for books

This commit is contained in:
Nemo 2020-03-29 02:32:18 +05:30
parent a8a396006c
commit db6d1c2c82
3 changed files with 45 additions and 2 deletions

View File

@ -29,6 +29,27 @@ describe Muse::Dl::Book do
book.thumbnail_url.should eq "https://muse.jhu.edu/book/875/image/front_cover.jpg?format=180" book.thumbnail_url.should eq "https://muse.jhu.edu/book/875/image/front_cover.jpg?format=180"
end end
it "should parse the chapters" do
book.chapters.should eq [
["16872", "Cover"],
["16873", "Title Page"],
["16874", "Copyright Page"],
["16875", "Table of Contents"],
["16876", "Acknowledgments"],
["16877", "Introduction"],
["16878", "Chapter 1: A Troubled Gentry"],
["16879", "Chapter 2: Beyond the Plantations"],
["16880", "Chapter 3: The World(s) Northern Neck Slavery Made"],
["16881", "Chapter 4: The Scottish Merchants\n"],
["16882", "Chapter 5: Controlling the Revolution\n"],
["16883", "Chapter 6: The Evangelical Challenge"],
["16884", "Chapter 7: The Preservation of Hegemony"],
["16885", "Notes"],
["16886", "Bibliography"],
["16887", "Index"],
]
end
it "it should parse the DOI for 68534" do it "it should parse the DOI for 68534" do
html = File.new("spec/fixtures/book-68534.html").gets_to_end html = File.new("spec/fixtures/book-68534.html").gets_to_end
book = Muse::Dl::Book.new html book = Muse::Dl::Book.new html

View File

@ -2,5 +2,25 @@ require "./thing.cr"
module Muse::Dl module Muse::Dl
class Book < Muse::Dl::Thing class Book < Muse::Dl::Thing
@chapters : Array(Array(String))
getter :chapters
def initialize(html : String)
super(html)
@chapters = parts(@h)
end
def parts(myhtml : Myhtml::Parser)
chapters = [] of Array(String)
myhtml.css(".title a").each do |a|
link = a.attribute_by("href").to_s
matches = /\/chapter\/(\d+)/.match link
if matches
chapters.push [matches[1], a.inner_text]
end
end
chapters
end
end end
end end

View File

@ -12,11 +12,14 @@ module Muse::Dl
@summary_html : String @summary_html : String
@cover_url : String @cover_url : String
@thumbnail_url : String @thumbnail_url : String
@h : Myhtml::Parser
getter :info, :title, :author, :date, :publisher, :summary, :summary_html, :cover_url, :thumbnail_url getter :info, :title, :author, :date, :publisher, :summary, :summary_html, :cover_url, :thumbnail_url
private getter :h
def initialize(html : String) def initialize(html : String)
h = Myhtml::Parser.new html @h = Myhtml::Parser.new html
@info = InfoParser.infobox(h) @info = InfoParser.infobox(h)
@title = InfoParser.title(h) @title = InfoParser.title(h)
@author = InfoParser.author(h) @author = InfoParser.author(h)
@ -24,7 +27,6 @@ module Muse::Dl
@publisher = InfoParser.publisher(h) @publisher = InfoParser.publisher(h)
@summary = InfoParser.summary(h) @summary = InfoParser.summary(h)
@summary_html = InfoParser.summary_html(h) @summary_html = InfoParser.summary_html(h)
@cover_url = "TODO" @cover_url = "TODO"
@thumbnail_url = "TODO" @thumbnail_url = "TODO"
end end