mirror of https://github.com/captn3m0/muse-dl
Chapter parsing in place for books
parent
a8a396006c
commit
db6d1c2c82
|
@ -29,6 +29,27 @@ describe Muse::Dl::Book do
|
|||
book.thumbnail_url.should eq "https://muse.jhu.edu/book/875/image/front_cover.jpg?format=180"
|
||||
end
|
||||
|
||||
it "should parse the chapters" do
|
||||
book.chapters.should eq [
|
||||
["16872", "Cover"],
|
||||
["16873", "Title Page"],
|
||||
["16874", "Copyright Page"],
|
||||
["16875", "Table of Contents"],
|
||||
["16876", "Acknowledgments"],
|
||||
["16877", "Introduction"],
|
||||
["16878", "Chapter 1: A Troubled Gentry"],
|
||||
["16879", "Chapter 2: Beyond the Plantations"],
|
||||
["16880", "Chapter 3: The World(s) Northern Neck Slavery Made"],
|
||||
["16881", "Chapter 4: The Scottish Merchants\n"],
|
||||
["16882", "Chapter 5: Controlling the Revolution\n"],
|
||||
["16883", "Chapter 6: The Evangelical Challenge"],
|
||||
["16884", "Chapter 7: The Preservation of Hegemony"],
|
||||
["16885", "Notes"],
|
||||
["16886", "Bibliography"],
|
||||
["16887", "Index"],
|
||||
]
|
||||
end
|
||||
|
||||
it "it should parse the DOI for 68534" do
|
||||
html = File.new("spec/fixtures/book-68534.html").gets_to_end
|
||||
book = Muse::Dl::Book.new html
|
||||
|
|
20
src/book.cr
20
src/book.cr
|
@ -2,5 +2,25 @@ require "./thing.cr"
|
|||
|
||||
module Muse::Dl
|
||||
class Book < Muse::Dl::Thing
|
||||
@chapters : Array(Array(String))
|
||||
|
||||
getter :chapters
|
||||
|
||||
def initialize(html : String)
|
||||
super(html)
|
||||
@chapters = parts(@h)
|
||||
end
|
||||
|
||||
def parts(myhtml : Myhtml::Parser)
|
||||
chapters = [] of Array(String)
|
||||
myhtml.css(".title a").each do |a|
|
||||
link = a.attribute_by("href").to_s
|
||||
matches = /\/chapter\/(\d+)/.match link
|
||||
if matches
|
||||
chapters.push [matches[1], a.inner_text]
|
||||
end
|
||||
end
|
||||
chapters
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -12,11 +12,14 @@ module Muse::Dl
|
|||
@summary_html : String
|
||||
@cover_url : String
|
||||
@thumbnail_url : String
|
||||
@h : Myhtml::Parser
|
||||
|
||||
getter :info, :title, :author, :date, :publisher, :summary, :summary_html, :cover_url, :thumbnail_url
|
||||
|
||||
private getter :h
|
||||
|
||||
def initialize(html : String)
|
||||
h = Myhtml::Parser.new html
|
||||
@h = Myhtml::Parser.new html
|
||||
@info = InfoParser.infobox(h)
|
||||
@title = InfoParser.title(h)
|
||||
@author = InfoParser.author(h)
|
||||
|
@ -24,7 +27,6 @@ module Muse::Dl
|
|||
@publisher = InfoParser.publisher(h)
|
||||
@summary = InfoParser.summary(h)
|
||||
@summary_html = InfoParser.summary_html(h)
|
||||
|
||||
@cover_url = "TODO"
|
||||
@thumbnail_url = "TODO"
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue