diff --git a/spec/book_spec.cr b/spec/book_spec.cr index 0a94bef..d16425b 100644 --- a/spec/book_spec.cr +++ b/spec/book_spec.cr @@ -17,6 +17,8 @@ describe Muse::Dl::Book do book.author.should eq "Albert H. Tillson, Jr." book.date.should eq "2010" book.publisher.should eq "University of Virginia Press" + book.formats.should contain :pdf + book.formats.should_not contain :html end it "should parse the summary" do @@ -50,7 +52,7 @@ describe Muse::Dl::Book do ] end - it "it should parse the DOI for 68534" do + it "should parse book/68534" do html = File.new("spec/fixtures/book-68534.html").gets_to_end book = Muse::Dl::Book.new html book.info["ISBN"].should eq "9781501737695" @@ -60,5 +62,14 @@ describe Muse::Dl::Book do book.info["Language"].should eq "English" book.info["Open Access"].should eq "Yes" book.info["DOI"].should eq "10.1353/book.68534" + book.formats.should contain :html + book.formats.should_not contain :pdf + end + + it "should note both formats for book/60322" do + html = File.new("spec/fixtures/book-60322.html").gets_to_end + book = Muse::Dl::Book.new html + book.formats.should contain :pdf + book.formats.should contain :html end end diff --git a/spec/fixtures/book-60322.html b/spec/fixtures/book-60322.html new file mode 100644 index 0000000..b1b9287 --- /dev/null +++ b/spec/fixtures/book-60322.html @@ -0,0 +1,2506 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Project MUSE - Policing Democracy + + + + + + + + + + + + + + + + + + + + + + + + + + + Article + + + + + + + + + + + + + + + +
+ +
+ +
+ + +
+
+
+ + + + + +
+

In this Book

+
+ +
+
+
+
+ + Policing Democracy + + + +
+
+ + + + + buy this book + Buy This Book in Print + + + + +
+ +
+
+ + +
+ +
+
+
+
+

Table of Contents

+
+
+ +
+
+
+ + + + +
+
+
    + +
  1. Cover
  2. + + + + + + + +
  3. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  4. + + + +
+
+
+ + + + +
+
+
    + +
  1. Title Page
  2. + + + + + + + +
  3. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  4. + + + +
+
+
+ + + + +
+
+
    + +
  1. Copyright Page
  2. + + + + + + + +
  3. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  4. + + + +
+
+
+ + + + +
+
+
    + +
  1. Contents
  2. + +
  3. pp. vii-viii
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. List of Figures, Maps, and Tables
  2. + +
  3. pp. ix-x
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. Acronyms and Abbreviations
  2. + +
  3. pp. xi-xviii
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. Preface
  2. + +
  3. pp. xix-xxiv
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. 1. Introduction
  2. + +
  3. pp. 1-20
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. 2. Realms of Change and Obstacles to Citizen Security Reform
  2. + +
  3. pp. 21-68
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. 3. Citizen Security and Democracy
  2. + +
  3. pp. 69-104
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. 4. Honduras
  2. + +
  3. pp. 105-173
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. 5. Bolivia
  2. + +
  3. pp. 174-232
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. 6. Argentina
  2. + +
  3. pp. 233-297
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. 7. Overcoming Obstacles to Reform
  2. + +
  3. pp. 298-323
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. 8. Conclusion
  2. + +
  3. pp. 324-330
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. Appendix A: National Homicide Rates, 1995–2009
  2. + +
  3. pp. 331-336
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. Appendix B: Citizen Security Structures and Police Ranks
  2. + +
  3. pp. 337-340
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. Glossary
  2. + +
  3. pp. 341-342
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. References
  2. + +
  3. pp. 343-366
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + + + +
+
+
    + +
  1. Index
  2. + +
  3. pp. 367-389
  4. + + + + + +
  5. + + + + + +
    + + +open access + + + + +
    + + + + +
    + +
  6. + + + +
+
+
+ + +
+ + + + +
+
+ +
+
+ +
+
+ +
+
+
+

Additional Information

+
+
+ + + +
+
+ ISBN +
+
+ 9781421428147 +
+
+ + +
+
+ Related ISBN +
+
+ 9781421429403 +
+
+ + +
+
+ DOI +
+ +
+ + +
+
+ MARC Record +
+
+ Download +
+
+ + +
+
+ OCLC +
+
+ 1049797471 +
+
+ + + +
+
+ Launched on MUSE +
+
+ 2018-08-29 +
+
+ + + +
+
+ Language +
+
+ English +
+
+ + +
+
+ Open Access +
+
+ + Yes + +
+
+ + + + +
+
+
+ + + + + + + +
+
+ +
+ + + + + + + + + +
+ Back To Top +
+ + + + + + + +
+

This website uses cookies to ensure you get the best experience on our website. Without cookies your experience may not be seamless.

+ + +
+ + + + + + + + + + + + + diff --git a/src/fetch.cr b/src/fetch.cr index 41973b3..94296f4 100644 --- a/src/fetch.cr +++ b/src/fetch.cr @@ -25,7 +25,7 @@ module Muse::Dl return end - url = "https://muse.jhu.edu/chapter/#{chapter_id}" + url = "https://muse.jhu.edu/chapter/#{chapter_id}/pdf" headers = HEADERS.merge({ "Referer" => "https://muse.jhu.edu/verify?url=%2Fchapter%2F#{chapter_id}%2Fpdf", }) diff --git a/src/infoparser.cr b/src/infoparser.cr index d7c9b1b..06eb43f 100644 --- a/src/infoparser.cr +++ b/src/infoparser.cr @@ -1,5 +1,12 @@ require "myhtml" +# https://github.com/kostya/myhtml/issues/19 +struct Myhtml::Node + def inner_html + String.build { |buf| children.each &.to_html(buf) } + end +end + module Muse::Dl class InfoParser def self.infobox(myhtml : Myhtml::Parser) @@ -16,6 +23,13 @@ module Muse::Dl return info end + def self.id(myhtml : Myhtml::Parser) + searchid = myhtml.css("#search_within_book_id").first + if searchid + searchid.attribute_by("value") + end + end + def self.title(myhtml : Myhtml::Parser) myhtml.css("#book_about_info .title").map(&.inner_text).to_a[0].strip end @@ -42,8 +56,24 @@ module Muse::Dl end def self.summary_html(myhtml : Myhtml::Parser) - return "TODO" - myhtml.css("#book_about_info .card_summary").map(&.tag_text).to_a[0].strip + summary_div = myhtml.css("#book_about_info .card_summary") + begin + summary_div.first.inner_html + rescue e : Exception + "NA" + end + end + + def self.formats(myhtml : Myhtml::Parser) + formats = Set(Symbol).new + myhtml.css("img.icon").each do |icon| + url = icon.attribute_by("src") + if url + formats.add :html if /html/i.match url + formats.add :pdf if /pdf/i.match url + end + end + formats end end end diff --git a/src/thing.cr b/src/thing.cr index 663b958..e199956 100644 --- a/src/thing.cr +++ b/src/thing.cr @@ -13,22 +13,26 @@ module Muse::Dl @cover_url : String @thumbnail_url : String @h : Myhtml::Parser + @formats : Set(Symbol) - getter :info, :title, :author, :date, :publisher, :summary, :summary_html, :cover_url, :thumbnail_url + getter :info, :title, :author, :date, :publisher, :summary, :summary_html, :cover_url, :thumbnail_url, :formats private getter :h def initialize(html : String) @h = Myhtml::Parser.new html @info = InfoParser.infobox(h) + id : String | Nil = InfoParser.id(h) @title = InfoParser.title(h) @author = InfoParser.author(h) @date = InfoParser.date(h) @publisher = InfoParser.publisher(h) @summary = InfoParser.summary(h) @summary_html = InfoParser.summary_html(h) - @cover_url = "TODO" - @thumbnail_url = "TODO" + @formats = InfoParser.formats(h) + # TODO: Make this work for journals as well + @cover_url = "https://muse.jhu.edu/book/#{id}/image/front_cover.jpg" + @thumbnail_url = "https://muse.jhu.edu/book/#{id}/image/front_cover.jpg?format=180" end end end