Adds fix for corrupt PDF error from MUSE

This commit is contained in:
Nemo 2020-04-02 03:09:32 +05:30
parent ae2a35bb7b
commit 82ce97cbc8
4 changed files with 35 additions and 1 deletions

12
spec/fetch_spec.cr Normal file
View File

@ -0,0 +1,12 @@
require "./spec_helper"
# require "errors/muse_corrupt_pdf.cr"
describe Muse::Dl::Book do
it "should notice the unable to construct chapter PDF error" do
f = "/tmp/chapter-2379787.pdf"
File.delete(f) if File.exists? f
expect_raises Muse::Dl::Errors::MuseCorruptPDF do
Muse::Dl::Fetch.save_chapter("/tmp", "2379787", "NA")
end
end
end

View File

@ -0,0 +1,4 @@
module Muse::Dl::Errors
class MuseCorruptPDF < Exception
end
end

View File

@ -1,5 +1,6 @@
require "crest"
require "./errors/*"
require "myhtml"
module Muse::Dl
class Fetch
@ -42,6 +43,18 @@ module Muse::Dl
# TODO: Add validation for the downloaded file (should be PDF)
Crest.get(url, max_redirects: 0, handle_errors: false, headers: headers) do |response|
# puts response.headers["Content-Type"]
content_type = response.headers["Content-Type"]
if content_type.is_a? String
if /html/.match content_type
puts response
response.body_io.each_line do |line|
if /Unable to construct chapter PDF/.match line
raise Muse::Dl::Errors::MuseCorruptPDF.new
end
end
end
end
File.open(tmp_pdf_file, "w") do |file|
IO.copy(response.body_io, file)
end

View File

@ -33,7 +33,12 @@ module Muse::Dl
unless parser.input_pdf
# Save each chapter
thing.chapters.each do |chapter|
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks)
begin
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks)
rescue e : Muse::Dl::Errors::MuseCorruptPDF
STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}"
return
end
end
chapter_ids = thing.chapters.map { |c| c[0] }