mirror of https://github.com/captn3m0/muse-dl
Report pdftk and download errors. Add exponential backoff to downloading after download failures. Add top-level rescue block to improve forward progress.
parent
d52b06377d
commit
483f838d24
12
src/fetch.cr
12
src/fetch.cr
|
@ -43,7 +43,10 @@ module Muse::Dl
|
|||
|
||||
# TODO: Add validation for the downloaded file (should be PDF)
|
||||
Crest.get(url, max_redirects: 0, handle_errors: false, headers: headers) do |response|
|
||||
# puts response.headers["Content-Type"]
|
||||
if !response.success?
|
||||
raise Muse::Dl::Errors::DownloadError.new("Error downloading chapter. HTTP response code: #{response.status}")
|
||||
end
|
||||
|
||||
content_type = response.headers["Content-Type"]
|
||||
if content_type.is_a? String
|
||||
if /html/.match content_type
|
||||
|
@ -59,7 +62,12 @@ module Muse::Dl
|
|||
end
|
||||
end
|
||||
File.open(tmp_pdf_file, "w") do |file|
|
||||
IO.copy(response.body_io, file)
|
||||
response_str = response.body
|
||||
file << response_str
|
||||
if file.size == 0
|
||||
# puts response.headers
|
||||
raise Muse::Dl::Errors::DownloadError.new("Error: downloaded chapter file size is zero. Response size was #{response_str.bytesize}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -65,14 +65,30 @@ module Muse::Dl
|
|||
def self.run(args : Array(String))
|
||||
parser = Parser.new(args)
|
||||
|
||||
delay_secs = 1
|
||||
input_list = parser.input_list
|
||||
if input_list
|
||||
File.each_line input_list do |url|
|
||||
# TODO: Change this to nil
|
||||
parser.reset_output_file
|
||||
parser.url = url.strip
|
||||
# Ask the download process to not quit the process, and return instead
|
||||
Main.dl parser
|
||||
begin
|
||||
# TODO: Change this to nil
|
||||
parser.reset_output_file
|
||||
parser.url = url.strip
|
||||
# Ask the download process to not quit the process, and return instead
|
||||
Main.dl parser
|
||||
if delay_secs >= 2
|
||||
delay_secs /= 2
|
||||
end
|
||||
rescue ex : Muse::Dl::Errors::DownloadError
|
||||
puts ex
|
||||
puts "Download error. Skipping book: #{url}. Waiting for #{delay_secs} seconds before continuing."
|
||||
# Sleep to prevent hammering the server.
|
||||
sleep(delay_secs)
|
||||
delay_secs *= 2
|
||||
rescue ex
|
||||
puts ex
|
||||
puts "Non-download error. Skipping book: #{url}."
|
||||
sleep(1)
|
||||
end
|
||||
end
|
||||
elsif parser.url
|
||||
Main.dl parser
|
||||
|
|
32
src/pdftk.cr
32
src/pdftk.cr
|
@ -28,14 +28,22 @@ module Muse::Dl
|
|||
def execute(args : Array(String))
|
||||
binary = @binary
|
||||
if binary
|
||||
Process.run(binary, args)
|
||||
status = Process.run(binary, args, output: STDOUT, error: STDERR)
|
||||
if !status.success?
|
||||
puts "pdftk command failed: #{binary} #{args.join(" ")}"
|
||||
end
|
||||
return status.success?
|
||||
end
|
||||
end
|
||||
|
||||
def strip_first_page(input_file : String)
|
||||
output_pdf = File.tempfile("muse-dl-temp", ".pdf")
|
||||
execute [input_file, "cat", "2-end", "output", output_pdf.path]
|
||||
File.rename output_pdf.path, input_file
|
||||
is_success = execute [input_file, "cat", "2-end", "output", output_pdf.path]
|
||||
if is_success
|
||||
File.rename output_pdf.path, input_file
|
||||
else
|
||||
raise Muse::Dl::Errors::PDFOperationError.new("Error stripping first page of chapter.")
|
||||
end
|
||||
end
|
||||
|
||||
def add_bookmark(input_file : String, title : String)
|
||||
|
@ -48,11 +56,15 @@ module Muse::Dl
|
|||
BookmarkPageNumber: 1
|
||||
END
|
||||
File.write(bookmark_text_file.path, bookmark_text)
|
||||
execute [input_file, "update_info", bookmark_text_file.path, "output", output_pdf.path]
|
||||
is_success = execute [input_file, "update_info", bookmark_text_file.path, "output", output_pdf.path]
|
||||
|
||||
# Cleanup
|
||||
bookmark_text_file.delete
|
||||
File.rename output_pdf.path, input_file
|
||||
if is_success
|
||||
File.rename output_pdf.path, input_file
|
||||
else
|
||||
raise Muse::Dl::Errors::PDFOperationError.new("Error adding bookmark metadata to chapter.")
|
||||
end
|
||||
end
|
||||
|
||||
def add_metadata(input_file : File, output_file : String, book : Book)
|
||||
|
@ -95,7 +107,10 @@ module Muse::Dl
|
|||
EOT
|
||||
|
||||
File.write(metadata_text_file.path, text)
|
||||
execute [input_file.path, "update_info_utf8", metadata_text_file.path, "output", output_file]
|
||||
is_success = execute [input_file.path, "update_info_utf8", metadata_text_file.path, "output", output_file]
|
||||
if !is_success
|
||||
raise Muse::Dl::Errors::PDFOperationError.new("Error adding metadata to book.")
|
||||
end
|
||||
metadata_text_file.delete
|
||||
end
|
||||
|
||||
|
@ -111,9 +126,12 @@ module Muse::Dl
|
|||
|
||||
chapter_files = chapter_ids.map { |id| Fetch.chapter_file_name(id, @tmp_file_path) }
|
||||
args = chapter_files + ["cat", "output", output_file.path]
|
||||
execute args
|
||||
is_success = execute args
|
||||
|
||||
# TODO: Validate final file here
|
||||
if !is_success
|
||||
raise Muse::Dl::Errors::PDFOperationError.new("Error stitching chapters together.")
|
||||
end
|
||||
|
||||
return output_file
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue