You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
muse-dl/src/muse-dl.cr

164 lines
5.5 KiB

require "./parser.cr"
require "./pdftk.cr"
require "./fetch.cr"
require "./book.cr"
require "./journal.cr"
require "./util.cr"
require "file_utils"
module Muse::Dl
VERSION = "1.3.1"
class Main
def self.dl(parser : Parser)
url = parser.url
puts "Downloading #{url}"
thing = Fetch.get_info(url) if url
return unless thing
if (thing.open_access) && (parser.skip_oa)
STDERR.puts "Skipping #{url}, available under Open Access"
return
end
if thing.is_a? Muse::Dl::Book
unless thing.formats.includes? :pdf
STDERR.puts "Book not available in PDF format, skipping: #{url}"
return
end
# Will have no effect if parser has a custom title
parser.output = Util.slug_filename "#{thing.title}.pdf"
# If file exists and we can't clobber
if File.exists?(parser.output) && parser.clobber == false
STDERR.puts "Skipping #{url}, File already exists: #{parser.output}"
return
end
temp_stitched_file = nil
pdf_builder = Pdftk.new(parser.tmp)
# Save each chapter
thing.chapters.each do |chapter|
begin
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks, parser.strip_first)
rescue e : Muse::Dl::Errors::MuseCorruptPDF
STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}"
return
end
end
chapter_ids = thing.chapters.map { |c| c[0] }
# Stitch the PDFs together
temp_stitched_file = pdf_builder.stitch chapter_ids
pdf_builder.add_metadata(temp_stitched_file, parser.output, thing)
temp_stitched_file.delete if temp_stitched_file
puts "--dont-strip-first-page was on. Please validate PDF file for any errors." unless parser.strip_first
puts "DL: #{url}. Saved final output to #{parser.output}"
# Cleanup the chapter files
if parser.cleanup
thing.chapters.each do |c|
Fetch.cleanup(parser.tmp, c[0])
end
end
elsif thing.is_a? Muse::Dl::Article
# No bookmarks are needed since this is just a single article PDF
begin
Fetch.save_article(parser.tmp, thing.id, parser.cookie, nil, parser.strip_first)
rescue e : Muse::Dl::Errors::MuseCorruptPDF
STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}"
return
end
# TODO: Move this code elsewhere
source = Fetch.article_file_name(thing.id, parser.tmp)
destination = "article-#{thing.id}.pdf"
# Needed because of https://github.com/crystal-lang/crystal/issues/7777
FileUtils.cp source, destination
FileUtils.rm source if parser.cleanup
elsif thing.is_a? Muse::Dl::Issue
# Will have no effect if parser has a custom title
parser.force_set_output Util.slug_filename "#{thing.journal_title} - #{thing.title}.pdf"
# If file exists and we can't clobber
if File.exists?(parser.output) && parser.clobber == false
STDERR.puts "Skipping #{url}, File already exists: #{parser.output}"
return
end
temp_stitched_file = nil
pdf_builder = Pdftk.new(parser.tmp)
thing.articles.each do |article|
begin
Fetch.save_article(parser.tmp, article.id, parser.cookie, article.title, parser.strip_first)
rescue e : Muse::Dl::Errors::MuseCorruptPDF
STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}"
return
end
end
article_ids = thing.articles.map { |a| a.id }
# Stitch the PDFs together
temp_stitched_file = pdf_builder.stitch_articles article_ids
pdf_builder.add_metadata(temp_stitched_file, parser.output, thing)
# temp_stitched_file.delete if temp_stitched_file
puts "--dont-strip-first-page was on. Please validate PDF file for any errors." unless parser.strip_first
puts "DL: #{url}. Saved final output to #{parser.output}"
# Cleanup the issue files
if parser.cleanup
thing.articles.each do |a|
Fetch.cleanup_articles(parser.tmp, a.id)
end
end
elsif thing.is_a? Muse::Dl::Journal
thing.issues.each do |issue|
begin
# Update the issue
issue.parse
parser.url = issue.url
Main.dl parser
rescue e
puts e.message
puts "Faced an exception with previous issue, continuing"
end
end
end
end
def self.run(args : Array(String))
parser = Parser.new(args)
delay_secs = 1
input_list = parser.input_list
if input_list
File.each_line input_list do |url|
begin
# TODO: Change this to nil
parser.reset_output_file
parser.url = url.strip
# Ask the download process to not quit the process, and return instead
Main.dl parser
if delay_secs >= 2
delay_secs /= 2
end
rescue ex
puts ex.message
puts ex.backtrace.join("\n ")
puts "Error. Skipping book: #{url}. Waiting for #{delay_secs} seconds before continuing."
sleep(delay_secs)
if delay_secs < 256
delay_secs *= 2
end
end
end
elsif parser.url
Main.dl parser
end
end
end
end
Muse::Dl::Main.run(ARGV)