From f04e9b799ed0e7b85866830397cde0f87ef3d125 Mon Sep 17 00:00:00 2001 From: Nemo Date: Tue, 30 Jun 2020 14:18:16 +0530 Subject: [PATCH] Removes input_pdf and initial work on article download --- shard.lock | 12 ++++++------ src/article.cr | 6 ++++++ src/fetch.cr | 6 +++++- src/muse-dl.cr | 31 ++++++++++++++----------------- src/parser.cr | 6 +----- 5 files changed, 32 insertions(+), 29 deletions(-) diff --git a/shard.lock b/shard.lock index 135a1dc..80ce5b8 100644 --- a/shard.lock +++ b/shard.lock @@ -1,18 +1,18 @@ -version: 1.0 +version: 2.0 shards: crest: - github: mamantoha/crest + git: https://github.com/mamantoha/crest.git version: 0.25.1 http-client-digest_auth: - github: mamantoha/http-client-digest_auth + git: https://github.com/mamantoha/http-client-digest_auth.git version: 0.4.0 myhtml: - github: kostya/myhtml + git: https://github.com/kostya/myhtml.git version: 1.5.1 webmock: - github: manastech/webmock.cr - commit: bb3eab30f6c7d1fdc0a7ff14cd136d68e860d1a7 + git: https://github.com/manastech/webmock.cr.git + version: 0.13.0+git.commit.bb3eab30f6c7d1fdc0a7ff14cd136d68e860d1a7 diff --git a/src/article.cr b/src/article.cr index 1b94b30..8da80b6 100644 --- a/src/article.cr +++ b/src/article.cr @@ -3,5 +3,11 @@ require "./issue.cr" module Muse::Dl class Article + @id : String + + def initialize(id : String) + @id = id + @url = "https://muse.jhu.edu/article/#{id}" + end end end diff --git a/src/fetch.cr b/src/fetch.cr index 3f581d8..acb96ab 100644 --- a/src/fetch.cr +++ b/src/fetch.cr @@ -98,7 +98,7 @@ module Muse::Dl end def self.get_info(url : String) - match = /https:\/\/muse.jhu.edu\/(book|journal)\/(\d+)/.match url + match = /https:\/\/muse.jhu.edu\/(book|journal|issue|article)\/(\d+)/.match url if match begin response = Crest.get(url).to_s @@ -107,6 +107,10 @@ module Muse::Dl return Muse::Dl::Book.new response when "journal" return Muse::Dl::Journal.new response + when "issue" + return Muse::Dl::Issue.new response + when "article" + return Muse::Dl::Article.new match[2] end rescue ex : Crest::NotFound raise Muse::Dl::Errors::InvalidLink.new("Error - could not download url: #{url}") diff --git a/src/muse-dl.cr b/src/muse-dl.cr index 28368fc..a0534d8 100644 --- a/src/muse-dl.cr +++ b/src/muse-dl.cr @@ -30,25 +30,20 @@ module Muse::Dl temp_stitched_file = nil pdf_builder = Pdftk.new(parser.tmp) - unless parser.input_pdf - # Save each chapter - thing.chapters.each do |chapter| - begin - Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks, parser.strip_first) - rescue e : Muse::Dl::Errors::MuseCorruptPDF - STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}" - return - end + # Save each chapter + thing.chapters.each do |chapter| + begin + Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.cookie, parser.bookmarks, parser.strip_first) + rescue e : Muse::Dl::Errors::MuseCorruptPDF + STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}" + return end - chapter_ids = thing.chapters.map { |c| c[0] } - - # Stitch the PDFs together - temp_stitched_file = pdf_builder.stitch chapter_ids - pdf_builder.add_metadata(temp_stitched_file, parser.output, thing) - else - x = parser.input_pdf - pdf_builder.add_metadata(File.open(x), parser.output, thing) if x end + chapter_ids = thing.chapters.map { |c| c[0] } + + # Stitch the PDFs together + temp_stitched_file = pdf_builder.stitch chapter_ids + pdf_builder.add_metadata(temp_stitched_file, parser.output, thing) temp_stitched_file.delete if temp_stitched_file puts "--dont-strip-first-page was on. Please validate PDF file for any errors." if parser.strip_first @@ -60,6 +55,8 @@ module Muse::Dl Fetch.cleanup(parser.tmp, c[0]) end end + elsif thing.is_a? Muse::Dl::Article + puts(thing) end end diff --git a/src/parser.cr b/src/parser.cr index 658b23b..ebfd2d7 100644 --- a/src/parser.cr +++ b/src/parser.cr @@ -10,7 +10,6 @@ module Muse::Dl @strip_first = true @output = DEFAULT_FILE_NAME @url : String | Nil - @input_pdf : String | Nil @clobber = false @input_list : String | Nil @cookie : String | Nil @@ -18,7 +17,7 @@ module Muse::Dl DEFAULT_FILE_NAME = "tempfilename.pdf" - getter :bookmarks, :tmp, :cleanup, :output, :url, :input_pdf, :clobber, :input_list, :cookie, :strip_first + getter :bookmarks, :tmp, :cleanup, :output, :url, :clobber, :input_list, :cookie, :strip_first setter :url # Update the output filename unless we have a custom one passed @@ -41,7 +40,6 @@ module Muse::Dl def initialize(arg : Array(String) = [] of String) @tmp = Dir.tempdir - @input_pdf = nil parser = OptionParser.new parser.banner = <<-EOT @@ -56,7 +54,6 @@ module Muse::Dl parser.on(long_flag = "--tmp-dir PATH", description = "Temporary Directory to use") { |path| @tmp = path } parser.on(long_flag = "--output FILE", description = "Output Filename") { |file| @output = file } parser.on(long_flag = "--no-bookmarks", description = "Don't add bookmarks in the PDF") { @bookmarks = false } - parser.on(long_flag = "--input-pdf INPUT", description = "Input Stitched PDF. Will not download anything") { |input| @input_pdf = input } parser.on(long_flag = "--clobber", description = "Overwrite the output file, if it already exists. Not compatible with input-pdf") { @clobber = true } parser.on(long_flag = "--dont-strip-first-page", description = "Disables first page from being stripped. Use carefully") { @strip_first = false } parser.on(long_flag = "--cookie COOKIE", description = "Cookie-header") { |cookie| @cookie = cookie } @@ -70,7 +67,6 @@ module Muse::Dl end if File.exists? args[0] @input_list = args[0] - @input_pdf = nil else @url = args[0] end