mirror of https://github.com/captn3m0/muse-dl.git
Removes input_pdf and initial work on article download
This commit is contained in:
parent
04a2fe52ec
commit
f04e9b799e
12
shard.lock
12
shard.lock
|
@ -1,18 +1,18 @@
|
||||||
version: 1.0
|
version: 2.0
|
||||||
shards:
|
shards:
|
||||||
crest:
|
crest:
|
||||||
github: mamantoha/crest
|
git: https://github.com/mamantoha/crest.git
|
||||||
version: 0.25.1
|
version: 0.25.1
|
||||||
|
|
||||||
http-client-digest_auth:
|
http-client-digest_auth:
|
||||||
github: mamantoha/http-client-digest_auth
|
git: https://github.com/mamantoha/http-client-digest_auth.git
|
||||||
version: 0.4.0
|
version: 0.4.0
|
||||||
|
|
||||||
myhtml:
|
myhtml:
|
||||||
github: kostya/myhtml
|
git: https://github.com/kostya/myhtml.git
|
||||||
version: 1.5.1
|
version: 1.5.1
|
||||||
|
|
||||||
webmock:
|
webmock:
|
||||||
github: manastech/webmock.cr
|
git: https://github.com/manastech/webmock.cr.git
|
||||||
commit: bb3eab30f6c7d1fdc0a7ff14cd136d68e860d1a7
|
version: 0.13.0+git.commit.bb3eab30f6c7d1fdc0a7ff14cd136d68e860d1a7
|
||||||
|
|
||||||
|
|
|
@ -3,5 +3,11 @@ require "./issue.cr"
|
||||||
|
|
||||||
module Muse::Dl
|
module Muse::Dl
|
||||||
class Article
|
class Article
|
||||||
|
@id : String
|
||||||
|
|
||||||
|
def initialize(id : String)
|
||||||
|
@id = id
|
||||||
|
@url = "https://muse.jhu.edu/article/#{id}"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -98,7 +98,7 @@ module Muse::Dl
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.get_info(url : String)
|
def self.get_info(url : String)
|
||||||
match = /https:\/\/muse.jhu.edu\/(book|journal)\/(\d+)/.match url
|
match = /https:\/\/muse.jhu.edu\/(book|journal|issue|article)\/(\d+)/.match url
|
||||||
if match
|
if match
|
||||||
begin
|
begin
|
||||||
response = Crest.get(url).to_s
|
response = Crest.get(url).to_s
|
||||||
|
@ -107,6 +107,10 @@ module Muse::Dl
|
||||||
return Muse::Dl::Book.new response
|
return Muse::Dl::Book.new response
|
||||||
when "journal"
|
when "journal"
|
||||||
return Muse::Dl::Journal.new response
|
return Muse::Dl::Journal.new response
|
||||||
|
when "issue"
|
||||||
|
return Muse::Dl::Issue.new response
|
||||||
|
when "article"
|
||||||
|
return Muse::Dl::Article.new match[2]
|
||||||
end
|
end
|
||||||
rescue ex : Crest::NotFound
|
rescue ex : Crest::NotFound
|
||||||
raise Muse::Dl::Errors::InvalidLink.new("Error - could not download url: #{url}")
|
raise Muse::Dl::Errors::InvalidLink.new("Error - could not download url: #{url}")
|
||||||
|
|
|
@ -30,25 +30,20 @@ module Muse::Dl
|
||||||
temp_stitched_file = nil
|
temp_stitched_file = nil
|
||||||
pdf_builder = Pdftk.new(parser.tmp)
|
pdf_builder = Pdftk.new(parser.tmp)
|
||||||
|
|
||||||
unless parser.input_pdf
|
# Save each chapter
|
||||||
# Save each chapter
|
thing.chapters.each do |chapter|
|
||||||
thing.chapters.each do |chapter|
|
begin
|
||||||
begin
|
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks, parser.strip_first)
|
||||||
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks, parser.strip_first)
|
rescue e : Muse::Dl::Errors::MuseCorruptPDF
|
||||||
rescue e : Muse::Dl::Errors::MuseCorruptPDF
|
STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}"
|
||||||
STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}"
|
return
|
||||||
return
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
chapter_ids = thing.chapters.map { |c| c[0] }
|
|
||||||
|
|
||||||
# Stitch the PDFs together
|
|
||||||
temp_stitched_file = pdf_builder.stitch chapter_ids
|
|
||||||
pdf_builder.add_metadata(temp_stitched_file, parser.output, thing)
|
|
||||||
else
|
|
||||||
x = parser.input_pdf
|
|
||||||
pdf_builder.add_metadata(File.open(x), parser.output, thing) if x
|
|
||||||
end
|
end
|
||||||
|
chapter_ids = thing.chapters.map { |c| c[0] }
|
||||||
|
|
||||||
|
# Stitch the PDFs together
|
||||||
|
temp_stitched_file = pdf_builder.stitch chapter_ids
|
||||||
|
pdf_builder.add_metadata(temp_stitched_file, parser.output, thing)
|
||||||
|
|
||||||
temp_stitched_file.delete if temp_stitched_file
|
temp_stitched_file.delete if temp_stitched_file
|
||||||
puts "--dont-strip-first-page was on. Please validate PDF file for any errors." if parser.strip_first
|
puts "--dont-strip-first-page was on. Please validate PDF file for any errors." if parser.strip_first
|
||||||
|
@ -60,6 +55,8 @@ module Muse::Dl
|
||||||
Fetch.cleanup(parser.tmp, c[0])
|
Fetch.cleanup(parser.tmp, c[0])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
elsif thing.is_a? Muse::Dl::Article
|
||||||
|
puts(thing)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,6 @@ module Muse::Dl
|
||||||
@strip_first = true
|
@strip_first = true
|
||||||
@output = DEFAULT_FILE_NAME
|
@output = DEFAULT_FILE_NAME
|
||||||
@url : String | Nil
|
@url : String | Nil
|
||||||
@input_pdf : String | Nil
|
|
||||||
@clobber = false
|
@clobber = false
|
||||||
@input_list : String | Nil
|
@input_list : String | Nil
|
||||||
@cookie : String | Nil
|
@cookie : String | Nil
|
||||||
|
@ -18,7 +17,7 @@ module Muse::Dl
|
||||||
|
|
||||||
DEFAULT_FILE_NAME = "tempfilename.pdf"
|
DEFAULT_FILE_NAME = "tempfilename.pdf"
|
||||||
|
|
||||||
getter :bookmarks, :tmp, :cleanup, :output, :url, :input_pdf, :clobber, :input_list, :cookie, :strip_first
|
getter :bookmarks, :tmp, :cleanup, :output, :url, :clobber, :input_list, :cookie, :strip_first
|
||||||
setter :url
|
setter :url
|
||||||
|
|
||||||
# Update the output filename unless we have a custom one passed
|
# Update the output filename unless we have a custom one passed
|
||||||
|
@ -41,7 +40,6 @@ module Muse::Dl
|
||||||
|
|
||||||
def initialize(arg : Array(String) = [] of String)
|
def initialize(arg : Array(String) = [] of String)
|
||||||
@tmp = Dir.tempdir
|
@tmp = Dir.tempdir
|
||||||
@input_pdf = nil
|
|
||||||
|
|
||||||
parser = OptionParser.new
|
parser = OptionParser.new
|
||||||
parser.banner = <<-EOT
|
parser.banner = <<-EOT
|
||||||
|
@ -56,7 +54,6 @@ module Muse::Dl
|
||||||
parser.on(long_flag = "--tmp-dir PATH", description = "Temporary Directory to use") { |path| @tmp = path }
|
parser.on(long_flag = "--tmp-dir PATH", description = "Temporary Directory to use") { |path| @tmp = path }
|
||||||
parser.on(long_flag = "--output FILE", description = "Output Filename") { |file| @output = file }
|
parser.on(long_flag = "--output FILE", description = "Output Filename") { |file| @output = file }
|
||||||
parser.on(long_flag = "--no-bookmarks", description = "Don't add bookmarks in the PDF") { @bookmarks = false }
|
parser.on(long_flag = "--no-bookmarks", description = "Don't add bookmarks in the PDF") { @bookmarks = false }
|
||||||
parser.on(long_flag = "--input-pdf INPUT", description = "Input Stitched PDF. Will not download anything") { |input| @input_pdf = input }
|
|
||||||
parser.on(long_flag = "--clobber", description = "Overwrite the output file, if it already exists. Not compatible with input-pdf") { @clobber = true }
|
parser.on(long_flag = "--clobber", description = "Overwrite the output file, if it already exists. Not compatible with input-pdf") { @clobber = true }
|
||||||
parser.on(long_flag = "--dont-strip-first-page", description = "Disables first page from being stripped. Use carefully") { @strip_first = false }
|
parser.on(long_flag = "--dont-strip-first-page", description = "Disables first page from being stripped. Use carefully") { @strip_first = false }
|
||||||
parser.on(long_flag = "--cookie COOKIE", description = "Cookie-header") { |cookie| @cookie = cookie }
|
parser.on(long_flag = "--cookie COOKIE", description = "Cookie-header") { |cookie| @cookie = cookie }
|
||||||
|
@ -70,7 +67,6 @@ module Muse::Dl
|
||||||
end
|
end
|
||||||
if File.exists? args[0]
|
if File.exists? args[0]
|
||||||
@input_list = args[0]
|
@input_list = args[0]
|
||||||
@input_pdf = nil
|
|
||||||
else
|
else
|
||||||
@url = args[0]
|
@url = args[0]
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue