diff --git a/spec/fetch_spec.cr b/spec/fetch_spec.cr new file mode 100644 index 0000000..6e97dfd --- /dev/null +++ b/spec/fetch_spec.cr @@ -0,0 +1,12 @@ +require "./spec_helper" +# require "errors/muse_corrupt_pdf.cr" + +describe Muse::Dl::Book do + it "should notice the unable to construct chapter PDF error" do + f = "/tmp/chapter-2379787.pdf" + File.delete(f) if File.exists? f + expect_raises Muse::Dl::Errors::MuseCorruptPDF do + Muse::Dl::Fetch.save_chapter("/tmp", "2379787", "NA") + end + end +end diff --git a/src/errors/muse_corrupt_pdf.cr b/src/errors/muse_corrupt_pdf.cr new file mode 100644 index 0000000..1c534ea --- /dev/null +++ b/src/errors/muse_corrupt_pdf.cr @@ -0,0 +1,4 @@ +module Muse::Dl::Errors + class MuseCorruptPDF < Exception + end +end diff --git a/src/fetch.cr b/src/fetch.cr index 240ab8b..524afef 100644 --- a/src/fetch.cr +++ b/src/fetch.cr @@ -1,5 +1,6 @@ require "crest" require "./errors/*" +require "myhtml" module Muse::Dl class Fetch @@ -42,6 +43,18 @@ module Muse::Dl # TODO: Add validation for the downloaded file (should be PDF) Crest.get(url, max_redirects: 0, handle_errors: false, headers: headers) do |response| + # puts response.headers["Content-Type"] + content_type = response.headers["Content-Type"] + if content_type.is_a? String + if /html/.match content_type + puts response + response.body_io.each_line do |line| + if /Unable to construct chapter PDF/.match line + raise Muse::Dl::Errors::MuseCorruptPDF.new + end + end + end + end File.open(tmp_pdf_file, "w") do |file| IO.copy(response.body_io, file) end diff --git a/src/muse-dl.cr b/src/muse-dl.cr index b5b9519..496cbc6 100644 --- a/src/muse-dl.cr +++ b/src/muse-dl.cr @@ -33,7 +33,12 @@ module Muse::Dl unless parser.input_pdf # Save each chapter thing.chapters.each do |chapter| - Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks) + begin + Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks) + rescue e : Muse::Dl::Errors::MuseCorruptPDF + STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}" + return + end end chapter_ids = thing.chapters.map { |c| c[0] }