mirror of https://github.com/captn3m0/muse-dl.git
Adds fix for corrupt PDF error from MUSE
This commit is contained in:
parent
ae2a35bb7b
commit
82ce97cbc8
|
@ -0,0 +1,12 @@
|
|||
require "./spec_helper"
|
||||
# require "errors/muse_corrupt_pdf.cr"
|
||||
|
||||
describe Muse::Dl::Book do
|
||||
it "should notice the unable to construct chapter PDF error" do
|
||||
f = "/tmp/chapter-2379787.pdf"
|
||||
File.delete(f) if File.exists? f
|
||||
expect_raises Muse::Dl::Errors::MuseCorruptPDF do
|
||||
Muse::Dl::Fetch.save_chapter("/tmp", "2379787", "NA")
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,4 @@
|
|||
module Muse::Dl::Errors
|
||||
class MuseCorruptPDF < Exception
|
||||
end
|
||||
end
|
13
src/fetch.cr
13
src/fetch.cr
|
@ -1,5 +1,6 @@
|
|||
require "crest"
|
||||
require "./errors/*"
|
||||
require "myhtml"
|
||||
|
||||
module Muse::Dl
|
||||
class Fetch
|
||||
|
@ -42,6 +43,18 @@ module Muse::Dl
|
|||
|
||||
# TODO: Add validation for the downloaded file (should be PDF)
|
||||
Crest.get(url, max_redirects: 0, handle_errors: false, headers: headers) do |response|
|
||||
# puts response.headers["Content-Type"]
|
||||
content_type = response.headers["Content-Type"]
|
||||
if content_type.is_a? String
|
||||
if /html/.match content_type
|
||||
puts response
|
||||
response.body_io.each_line do |line|
|
||||
if /Unable to construct chapter PDF/.match line
|
||||
raise Muse::Dl::Errors::MuseCorruptPDF.new
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
File.open(tmp_pdf_file, "w") do |file|
|
||||
IO.copy(response.body_io, file)
|
||||
end
|
||||
|
|
|
@ -33,7 +33,12 @@ module Muse::Dl
|
|||
unless parser.input_pdf
|
||||
# Save each chapter
|
||||
thing.chapters.each do |chapter|
|
||||
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks)
|
||||
begin
|
||||
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks)
|
||||
rescue e : Muse::Dl::Errors::MuseCorruptPDF
|
||||
STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}"
|
||||
return
|
||||
end
|
||||
end
|
||||
chapter_ids = thing.chapters.map { |c| c[0] }
|
||||
|
||||
|
|
Loading…
Reference in New Issue