diff --git a/spec/fetch_spec.cr b/spec/fetch_spec.cr index c01ed8f..abe1a72 100644 --- a/spec/fetch_spec.cr +++ b/spec/fetch_spec.cr @@ -1,4 +1,5 @@ require "./spec_helper" +require "webmock" # require "errors/muse_corrupt_pdf.cr" describe Muse::Dl::Book do diff --git a/src/fetch.cr b/src/fetch.cr index 6bc987a..3f581d8 100644 --- a/src/fetch.cr +++ b/src/fetch.cr @@ -63,7 +63,6 @@ module Muse::Dl content_type = response.headers["Content-Type"] if content_type.is_a? String if /html/.match content_type - puts response response.body.each_line do |line| # https://muse.jhu.edu/chapter/2383438/pdf # https://muse.jhu.edu/book/67393 diff --git a/src/issue.cr b/src/issue.cr index aad8b20..2e826f7 100644 --- a/src/issue.cr +++ b/src/issue.cr @@ -14,6 +14,7 @@ module Muse::Dl @volume : String | Nil @number : String | Nil @date : String | Nil + @issues : Array(Muse::Dl::Issue) getter :id, :title, :articles, :url, :summary, :publisher, :info, :volume, :number, :date @@ -22,6 +23,7 @@ module Muse::Dl @url = "https://muse.jhu.edu/issue/#{id}" @info = Hash(String, String).new @articles = [] of Muse::Dl::Article + @issues = [] of Muse::Dl::Issue end def parse @@ -32,6 +34,7 @@ module Muse::Dl @summary = InfoParser.summary(h) @publisher = InfoParser.journal_publisher(h) parse_title + parse_contents(h) end def parse_title @@ -42,5 +45,16 @@ module Muse::Dl @date = /((January|February|March|April|May|June|July|August|September|October|November|December) (\d+))/.match(t).try &.[1] end end + + def parse_contents(myhtml : Myhtml::Parser) + myhtml.css("#available_issues_list_text a").each do |a| + link = a.attribute_by("href").to_s + + matches = /\/issue\/(\d+)/.match link + if matches + @issues.push Muse::Dl::Issue.new matches[1] + end + end + end end end diff --git a/src/muse-dl.cr b/src/muse-dl.cr index 9b71f65..28368fc 100644 --- a/src/muse-dl.cr +++ b/src/muse-dl.cr @@ -51,7 +51,7 @@ module Muse::Dl end temp_stitched_file.delete if temp_stitched_file - puts "--dont-strip-first-page was on. Please validate PDF file for any errors." + puts "--dont-strip-first-page was on. Please validate PDF file for any errors." if parser.strip_first puts "DL: #{url}. Saved final output to #{parser.output}" # Cleanup the chapter files