mirror of https://github.com/captn3m0/muse-dl.git
Modular code in fetch to support both chapters and articles
This commit is contained in:
parent
f04e9b799e
commit
870ed3080d
|
@ -4,6 +4,7 @@ require "./issue.cr"
|
||||||
module Muse::Dl
|
module Muse::Dl
|
||||||
class Article
|
class Article
|
||||||
@id : String
|
@id : String
|
||||||
|
getter :id
|
||||||
|
|
||||||
def initialize(id : String)
|
def initialize(id : String)
|
||||||
@id = id
|
@id = id
|
||||||
|
|
50
src/fetch.cr
50
src/fetch.cr
|
@ -14,6 +14,10 @@ module Muse::Dl
|
||||||
"Connection" => "keep-alive",
|
"Connection" => "keep-alive",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def self.article_file_name(id : String, tmp_path : String)
|
||||||
|
"#{tmp_path}/article-#{id}.pdf"
|
||||||
|
end
|
||||||
|
|
||||||
def self.chapter_file_name(id : String, tmp_path : String)
|
def self.chapter_file_name(id : String, tmp_path : String)
|
||||||
"#{tmp_path}/chapter-#{id}.pdf"
|
"#{tmp_path}/chapter-#{id}.pdf"
|
||||||
end
|
end
|
||||||
|
@ -23,24 +27,20 @@ module Muse::Dl
|
||||||
File.delete(fns) if File.exists?(fns)
|
File.delete(fns) if File.exists?(fns)
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.save_chapter(tmp_path : String, chapter_id : String, chapter_title : String, cookie : String | Nil = nil, add_bookmark = true, strip_first_page = true)
|
def self.save_url(url : String, referer : String, file_name : String, tmp_path : String, cookie : String | Nil = nil, bookmark_title : String | Nil = nil, strip_first_page = true)
|
||||||
final_pdf_file = chapter_file_name chapter_id, tmp_path
|
tmp_pdf_file = "#{file_name}.tmp"
|
||||||
tmp_pdf_file = "#{final_pdf_file}.tmp"
|
if File.exists? file_name
|
||||||
|
puts "#{file_name} already downloaded"
|
||||||
if File.exists? final_pdf_file
|
|
||||||
puts "#{chapter_id} already downloaded"
|
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
|
|
||||||
# TODO: Remove this hardcoding, and make this more generic by generating it within the Book class
|
|
||||||
url = "https://muse.jhu.edu/chapter/#{chapter_id}/pdf"
|
|
||||||
uri = URI.parse(url)
|
uri = URI.parse(url)
|
||||||
http_client = HTTP::Client.new(uri)
|
http_client = HTTP::Client.new(uri)
|
||||||
# Raise a IO::TimeoutError after 60 seconds.
|
# Raise a IO::TimeoutError after 60 seconds.
|
||||||
http_client.read_timeout = DOWNLOAD_TIMEOUT_SECS
|
http_client.read_timeout = DOWNLOAD_TIMEOUT_SECS
|
||||||
|
|
||||||
headers = HEADERS.merge({
|
headers = HEADERS.merge({
|
||||||
"Referer" => "https://muse.jhu.edu/verify?url=%2Fchapter%2F#{chapter_id}%2Fpdf",
|
"Referer" => referer,
|
||||||
})
|
})
|
||||||
|
|
||||||
if cookie
|
if cookie
|
||||||
|
@ -52,7 +52,7 @@ module Muse::Dl
|
||||||
begin
|
begin
|
||||||
response = request.execute
|
response = request.execute
|
||||||
rescue ex : IO::TimeoutError
|
rescue ex : IO::TimeoutError
|
||||||
raise Muse::Dl::Errors::DownloadError.new("Error downloading chapter. Download took longer than #{DOWNLOAD_TIMEOUT_SECS} seconds.")
|
raise Muse::Dl::Errors::DownloadError.new("Error downloading #{url}. Download took longer than #{DOWNLOAD_TIMEOUT_SECS} seconds.")
|
||||||
end
|
end
|
||||||
|
|
||||||
# TODO: Add validation for the downloaded file (should be PDF)
|
# TODO: Add validation for the downloaded file (should be PDF)
|
||||||
|
@ -76,6 +76,7 @@ module Muse::Dl
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
File.open(tmp_pdf_file, "w") do |file|
|
File.open(tmp_pdf_file, "w") do |file|
|
||||||
file << response.body
|
file << response.body
|
||||||
if file.size == 0
|
if file.size == 0
|
||||||
|
@ -87,16 +88,39 @@ module Muse::Dl
|
||||||
|
|
||||||
pdftk.strip_first_page tmp_pdf_file if strip_first_page
|
pdftk.strip_first_page tmp_pdf_file if strip_first_page
|
||||||
|
|
||||||
if add_bookmark
|
if bookmark_title
|
||||||
# Run pdftk and add the bookmark to the file
|
# Run pdftk and add the bookmark to the file
|
||||||
pdftk.add_bookmark tmp_pdf_file, chapter_title.strip
|
pdftk.add_bookmark tmp_pdf_file, bookmark_title
|
||||||
end
|
end
|
||||||
|
|
||||||
# Now we can move the file to the proper PDF filename
|
# Now we can move the file to the proper PDF filename
|
||||||
File.rename tmp_pdf_file, final_pdf_file
|
File.rename tmp_pdf_file, file_name
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.save_chapter(tmp_path : String, chapter_id : String, chapter_title : String, cookie : String | Nil = nil, add_bookmark = true, strip_first_page = true)
|
||||||
|
final_pdf_file = chapter_file_name chapter_id, tmp_path
|
||||||
|
|
||||||
|
if File.exists? final_pdf_file
|
||||||
|
puts "#{chapter_id} already downloaded"
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
# TODO: Remove this hardcoding, and make this more generic by generating it within the Book class
|
||||||
|
url = "https://muse.jhu.edu/chapter/#{chapter_id}/pdf"
|
||||||
|
referer = "https://muse.jhu.edu/verify?url=%2Fchapter%2F#{chapter_id}%2Fpdf"
|
||||||
|
|
||||||
|
save_url(url, referer, final_pdf_file, tmp_path, cookie, chapter_title, strip_first_page)
|
||||||
|
|
||||||
puts "Downloaded #{chapter_id}"
|
puts "Downloaded #{chapter_id}"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.save_article(tmp_path : String, article_id : String, cookie : String | Nil = nil, article_title = nil, strip_first_page = true)
|
||||||
|
file_name = article_file_name article_id, tmp_path
|
||||||
|
url = "https://muse.jhu.edu/article/#{article_id}/pdf"
|
||||||
|
referer = "https://muse.jhu.edu/article/#{article_id}"
|
||||||
|
save_url(url, referer, file_name, tmp_path, cookie, article_title, strip_first_page)
|
||||||
|
end
|
||||||
|
|
||||||
def self.get_info(url : String)
|
def self.get_info(url : String)
|
||||||
match = /https:\/\/muse.jhu.edu\/(book|journal|issue|article)\/(\d+)/.match url
|
match = /https:\/\/muse.jhu.edu\/(book|journal|issue|article)\/(\d+)/.match url
|
||||||
if match
|
if match
|
||||||
|
|
|
@ -4,6 +4,7 @@ require "./fetch.cr"
|
||||||
require "./book.cr"
|
require "./book.cr"
|
||||||
require "./journal.cr"
|
require "./journal.cr"
|
||||||
require "./util.cr"
|
require "./util.cr"
|
||||||
|
require "file_utils"
|
||||||
|
|
||||||
module Muse::Dl
|
module Muse::Dl
|
||||||
VERSION = "1.1.2"
|
VERSION = "1.1.2"
|
||||||
|
@ -56,7 +57,20 @@ module Muse::Dl
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
elsif thing.is_a? Muse::Dl::Article
|
elsif thing.is_a? Muse::Dl::Article
|
||||||
puts(thing)
|
# No bookmarks are needed since this is just a single article PDF
|
||||||
|
begin
|
||||||
|
Fetch.save_article(parser.tmp, thing.id, parser.cookie, nil, parser.strip_first)
|
||||||
|
rescue e : Muse::Dl::Errors::MuseCorruptPDF
|
||||||
|
STDERR.puts "Got a 'Unable to construct chapter PDF' error from MUSE, skipping: #{url}"
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
# TODO: Move this code elsewhere
|
||||||
|
source = Fetch.article_file_name(thing.id, parser.tmp)
|
||||||
|
destination = "article-#{thing.id}.pdf"
|
||||||
|
# Needed because of https://github.com/crystal-lang/crystal/issues/7777
|
||||||
|
FileUtils.cp source, destination
|
||||||
|
FileUtils.rm source if parser.cleanup
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue