From 376adf28cb315590971e485941d40cdd9c7fe892 Mon Sep 17 00:00:00 2001 From: Nemo Date: Sun, 29 Mar 2020 03:52:57 +0530 Subject: [PATCH] Fixes some redirect bullshit --- src/fetch.cr | 45 +++++++++++++++++++++++++++++++++++++++++---- src/muse-dl.cr | 11 ++++++++--- src/parser.cr | 3 ++- 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/src/fetch.cr b/src/fetch.cr index d1ea54b..7ddc636 100644 --- a/src/fetch.cr +++ b/src/fetch.cr @@ -3,11 +3,48 @@ require "./errors/*" module Muse::Dl class Fetch - def self.get_info(url : String) : (Muse::Dl::Book | Muse::Dl::Journal) + USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36" + # TODO: Add support for cookies? + # "Cookie" => "session=124.123.104.8.1585388207021325", + HEADERS = { + "User-Agent" => USER_AGENT, + "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Accept-Language" => "en-US,en;q=0.5", + "DNT" => "1", + "Cookie" => "session=124.123.104.8.1585420925750331; session=25719682.5a1ef8cb90ec8", + "Connection" => "keep-alive", + "Upgrade-Insecure-Requests" => "1", + "Cache-Control" => "max-age=0", + } + + def self.save_chapter(tmp_path : String, chapter_id : String, add_bookmark = true) + url = "https://muse.jhu.edu/chapter/#{chapter_id}" + headers = HEADERS.merge({ + "Referer" => "https://muse.jhu.edu/verify?url=%2Fchapter%2F#{chapter_id}%2Fpdf", + }) + + begin + Crest.get(url, max_redirects: 0, handle_errors: false, headers: headers) do |response| + File.open("#{tmp_path}/chapter-#{chapter_id}.pdf", "w") do |file| + IO.copy(response.body_io, file) + end + rescue e : Exception + puts e.message + raise e + # We catch a temporary redirect + # https://github.com/mamantoha/crest/blob/29a690726902c71884f9c80f0f9565256e74b7fd/src/crest/exceptions.cr#L20-L28 + end + rescue e : Exception + puts "FICK" + raise e + end + end + + def self.get_info(url : String) : Muse::Dl::Thing | Nil match = /https:\/\/muse.jhu.edu\/(book|journal)\/(\d+)/.match url if match begin - response = Crest.get url + response = Crest.get(url).to_s case match[1] when "book" return Muse::Dl::Book.new response @@ -15,10 +52,10 @@ module Muse::Dl return Muse::Dl::Journal.new response end rescue ex : Crest::NotFound - raise Muse::Dl::Errors::InvalidLink + raise Muse::Dl::Errors::InvalidLink.new end else - raise Muse::Dl::Errors::InvalidLink + raise Muse::Dl::Errors::InvalidLink.new end end end diff --git a/src/muse-dl.cr b/src/muse-dl.cr index 468cc3b..a3f3807 100644 --- a/src/muse-dl.cr +++ b/src/muse-dl.cr @@ -2,8 +2,7 @@ require "./parser.cr" require "./pdftk.cr" require "./fetch.cr" require "./book.cr" - -# require "./journal.cr" +require "./journal.cr" # TODO: Write documentation for `Muse::Dl` module Muse::Dl @@ -13,7 +12,13 @@ module Muse::Dl class Main def self.run(args : Array(String)) parser = Parser.new(args) - Fetch.get_info(parser.url) + thing = Fetch.get_info(parser.url) + + if thing.is_a? Muse::Dl::Book + thing.chapters.each do |chapter| + Fetch.save_chapter(parser.tmp, chapter[0]) + end + end end end end diff --git a/src/parser.cr b/src/parser.cr index 766b7a0..fec1fec 100644 --- a/src/parser.cr +++ b/src/parser.cr @@ -6,7 +6,7 @@ module Muse::Dl @tmp : String @cleanup : Bool @output : String - @url : String | Nil + @url = "INVALID_URL" getter :bookmarks, :tmp, :cleanup, :output, :url @@ -27,6 +27,7 @@ module Muse::Dl begin @url = arg[-1] rescue e : Exception + @url = "" raise Errors::MissingLink.new end end