mirror of https://github.com/captn3m0/muse-dl.git
Minor fixes, parse contents for issues
This commit is contained in:
parent
aa392eaa64
commit
04a2fe52ec
|
@ -1,4 +1,5 @@
|
||||||
require "./spec_helper"
|
require "./spec_helper"
|
||||||
|
require "webmock"
|
||||||
# require "errors/muse_corrupt_pdf.cr"
|
# require "errors/muse_corrupt_pdf.cr"
|
||||||
|
|
||||||
describe Muse::Dl::Book do
|
describe Muse::Dl::Book do
|
||||||
|
|
|
@ -63,7 +63,6 @@ module Muse::Dl
|
||||||
content_type = response.headers["Content-Type"]
|
content_type = response.headers["Content-Type"]
|
||||||
if content_type.is_a? String
|
if content_type.is_a? String
|
||||||
if /html/.match content_type
|
if /html/.match content_type
|
||||||
puts response
|
|
||||||
response.body.each_line do |line|
|
response.body.each_line do |line|
|
||||||
# https://muse.jhu.edu/chapter/2383438/pdf
|
# https://muse.jhu.edu/chapter/2383438/pdf
|
||||||
# https://muse.jhu.edu/book/67393
|
# https://muse.jhu.edu/book/67393
|
||||||
|
|
14
src/issue.cr
14
src/issue.cr
|
@ -14,6 +14,7 @@ module Muse::Dl
|
||||||
@volume : String | Nil
|
@volume : String | Nil
|
||||||
@number : String | Nil
|
@number : String | Nil
|
||||||
@date : String | Nil
|
@date : String | Nil
|
||||||
|
@issues : Array(Muse::Dl::Issue)
|
||||||
|
|
||||||
getter :id, :title, :articles, :url, :summary, :publisher, :info, :volume, :number, :date
|
getter :id, :title, :articles, :url, :summary, :publisher, :info, :volume, :number, :date
|
||||||
|
|
||||||
|
@ -22,6 +23,7 @@ module Muse::Dl
|
||||||
@url = "https://muse.jhu.edu/issue/#{id}"
|
@url = "https://muse.jhu.edu/issue/#{id}"
|
||||||
@info = Hash(String, String).new
|
@info = Hash(String, String).new
|
||||||
@articles = [] of Muse::Dl::Article
|
@articles = [] of Muse::Dl::Article
|
||||||
|
@issues = [] of Muse::Dl::Issue
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse
|
def parse
|
||||||
|
@ -32,6 +34,7 @@ module Muse::Dl
|
||||||
@summary = InfoParser.summary(h)
|
@summary = InfoParser.summary(h)
|
||||||
@publisher = InfoParser.journal_publisher(h)
|
@publisher = InfoParser.journal_publisher(h)
|
||||||
parse_title
|
parse_title
|
||||||
|
parse_contents(h)
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse_title
|
def parse_title
|
||||||
|
@ -42,5 +45,16 @@ module Muse::Dl
|
||||||
@date = /((January|February|March|April|May|June|July|August|September|October|November|December) (\d+))/.match(t).try &.[1]
|
@date = /((January|February|March|April|May|June|July|August|September|October|November|December) (\d+))/.match(t).try &.[1]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def parse_contents(myhtml : Myhtml::Parser)
|
||||||
|
myhtml.css("#available_issues_list_text a").each do |a|
|
||||||
|
link = a.attribute_by("href").to_s
|
||||||
|
|
||||||
|
matches = /\/issue\/(\d+)/.match link
|
||||||
|
if matches
|
||||||
|
@issues.push Muse::Dl::Issue.new matches[1]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -51,7 +51,7 @@ module Muse::Dl
|
||||||
end
|
end
|
||||||
|
|
||||||
temp_stitched_file.delete if temp_stitched_file
|
temp_stitched_file.delete if temp_stitched_file
|
||||||
puts "--dont-strip-first-page was on. Please validate PDF file for any errors."
|
puts "--dont-strip-first-page was on. Please validate PDF file for any errors." if parser.strip_first
|
||||||
puts "DL: #{url}. Saved final output to #{parser.output}"
|
puts "DL: #{url}. Saved final output to #{parser.output}"
|
||||||
|
|
||||||
# Cleanup the chapter files
|
# Cleanup the chapter files
|
||||||
|
|
Loading…
Reference in New Issue