2020-03-28 19:29:47 +00:00
|
|
|
require "process"
|
2020-03-29 12:21:01 +00:00
|
|
|
require "file"
|
|
|
|
require "./fetch"
|
|
|
|
require "./errors/*"
|
2020-03-29 13:40:50 +00:00
|
|
|
require "dir"
|
2020-03-28 19:29:47 +00:00
|
|
|
|
|
|
|
module Muse::Dl
|
|
|
|
class Pdftk
|
|
|
|
PDFTK_BINARY_NAME = "pdftk"
|
2020-03-31 20:34:15 +00:00
|
|
|
@binary : String | Nil
|
2020-03-29 12:21:01 +00:00
|
|
|
@tmp_file_path : String
|
2020-03-28 19:29:47 +00:00
|
|
|
|
2020-03-31 20:34:15 +00:00
|
|
|
def ready?
|
|
|
|
@binary != nil
|
|
|
|
end
|
2020-03-28 19:29:47 +00:00
|
|
|
|
2020-03-29 13:40:50 +00:00
|
|
|
def initialize(tmp_file_path : String = Dir.tempdir)
|
2020-03-29 12:21:01 +00:00
|
|
|
@tmp_file_path = tmp_file_path
|
|
|
|
possible_binary = Process.find_executable(Pdftk::PDFTK_BINARY_NAME)
|
|
|
|
if possible_binary
|
|
|
|
@binary = possible_binary
|
|
|
|
else
|
2020-03-28 19:29:47 +00:00
|
|
|
puts "Could not find pdftk binary, exiting"
|
|
|
|
Process.exit(1)
|
|
|
|
end
|
|
|
|
end
|
2020-03-29 12:21:01 +00:00
|
|
|
|
|
|
|
def execute(args : Array(String))
|
2020-04-01 12:21:31 +00:00
|
|
|
binary = @binary
|
|
|
|
if binary
|
2020-04-19 05:58:20 +00:00
|
|
|
status = Process.run(binary, args, output: STDOUT, error: STDERR)
|
|
|
|
if !status.success?
|
|
|
|
puts "pdftk command failed: #{binary} #{args.join(" ")}"
|
|
|
|
end
|
|
|
|
return status.success?
|
2020-04-01 12:21:31 +00:00
|
|
|
end
|
2020-03-29 12:21:01 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def strip_first_page(input_file : String)
|
|
|
|
output_pdf = File.tempfile("muse-dl-temp", ".pdf")
|
2020-04-19 05:58:20 +00:00
|
|
|
is_success = execute [input_file, "cat", "2-end", "output", output_pdf.path]
|
|
|
|
if is_success
|
|
|
|
File.rename output_pdf.path, input_file
|
|
|
|
else
|
2020-05-13 19:34:15 +00:00
|
|
|
puts ("Error stripping first page of chapter. Maybe try using --dont-strip-first-page")
|
|
|
|
exit 1
|
2020-04-19 05:58:20 +00:00
|
|
|
end
|
2020-03-29 12:21:01 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def add_bookmark(input_file : String, title : String)
|
|
|
|
output_pdf = File.tempfile("muse-dl-temp", ".pdf")
|
|
|
|
bookmark_text_file = File.tempfile("muse-dl-chapter-tmp", ".txt")
|
|
|
|
bookmark_text = <<-END
|
2020-03-29 13:21:07 +00:00
|
|
|
BookmarkBegin
|
|
|
|
BookmarkTitle: #{title}
|
|
|
|
BookmarkLevel: 1
|
|
|
|
BookmarkPageNumber: 1
|
|
|
|
END
|
2020-03-29 12:21:01 +00:00
|
|
|
File.write(bookmark_text_file.path, bookmark_text)
|
2020-04-19 05:58:20 +00:00
|
|
|
is_success = execute [input_file, "update_info", bookmark_text_file.path, "output", output_pdf.path]
|
2020-03-29 12:21:01 +00:00
|
|
|
|
|
|
|
# Cleanup
|
|
|
|
bookmark_text_file.delete
|
2020-04-19 05:58:20 +00:00
|
|
|
if is_success
|
|
|
|
File.rename output_pdf.path, input_file
|
|
|
|
else
|
|
|
|
raise Muse::Dl::Errors::PDFOperationError.new("Error adding bookmark metadata to chapter.")
|
|
|
|
end
|
2020-03-29 12:21:01 +00:00
|
|
|
end
|
|
|
|
|
2020-03-29 13:21:07 +00:00
|
|
|
def add_metadata(input_file : File, output_file : String, book : Book)
|
|
|
|
# First we have to dump the current metadata
|
2020-03-29 18:38:10 +00:00
|
|
|
keywords = "Publisher:#{book.publisher}, Published:#{book.date}"
|
|
|
|
|
|
|
|
# Known Info keys, if they are present
|
|
|
|
["ISBN", "Related ISBN", "DOI", "Language", "OCLC"].each do |label|
|
|
|
|
if book.info.has_key? label
|
|
|
|
keywords += ", #{label}:#{book.info[label]}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-06-30 12:06:44 +00:00
|
|
|
metadata_text = gen_metadata(book.title, keywords, book.summary.gsub(/\n\s+/, " "), book.author)
|
|
|
|
write_metadata(input_file, output_file, metadata_text)
|
|
|
|
end
|
|
|
|
|
|
|
|
def gen_metadata(title : String, keywords : String, subject : String, author : String | Nil = nil)
|
|
|
|
metadata = <<-EOT
|
2020-03-29 13:21:07 +00:00
|
|
|
InfoBegin
|
|
|
|
InfoKey: Creator
|
2020-04-07 19:08:16 +00:00
|
|
|
InfoValue:
|
2020-03-29 13:21:07 +00:00
|
|
|
InfoBegin
|
|
|
|
InfoKey: Producer
|
2020-04-07 19:08:16 +00:00
|
|
|
InfoValue:
|
2020-03-29 13:21:07 +00:00
|
|
|
InfoBegin
|
|
|
|
InfoKey: Title
|
2020-06-30 12:06:44 +00:00
|
|
|
InfoValue: #{title}
|
2020-03-29 13:21:07 +00:00
|
|
|
InfoBegin
|
|
|
|
InfoKey: Keywords
|
2020-03-29 18:38:10 +00:00
|
|
|
InfoValue: #{keywords}
|
2020-03-29 13:21:07 +00:00
|
|
|
InfoBegin
|
|
|
|
InfoKey: Subject
|
2020-06-30 12:06:44 +00:00
|
|
|
InfoValue: #{subject}
|
2020-03-29 18:38:10 +00:00
|
|
|
InfoBegin
|
|
|
|
InfoKey: ModDate
|
|
|
|
InfoValue:
|
|
|
|
InfoBegin
|
|
|
|
InfoKey: CreationDate
|
|
|
|
InfoValue:
|
2020-06-30 12:06:44 +00:00
|
|
|
|
2020-03-29 13:21:07 +00:00
|
|
|
EOT
|
|
|
|
|
2020-06-30 12:06:44 +00:00
|
|
|
unless author.nil?
|
|
|
|
metadata += <<-EOT
|
|
|
|
InfoBegin
|
|
|
|
InfoKey: Author
|
|
|
|
InfoValue: #{author}
|
|
|
|
EOT
|
|
|
|
end
|
|
|
|
|
|
|
|
return metadata
|
|
|
|
end
|
|
|
|
|
|
|
|
def write_metadata(input_file : File, output_file : String, text)
|
|
|
|
metadata_text_file = File.tempfile("muse-dl-metadata-tmp", ".txt")
|
2020-03-29 13:21:07 +00:00
|
|
|
File.write(metadata_text_file.path, text)
|
2020-06-30 12:06:44 +00:00
|
|
|
|
2020-04-19 05:58:20 +00:00
|
|
|
is_success = execute [input_file.path, "update_info_utf8", metadata_text_file.path, "output", output_file]
|
|
|
|
if !is_success
|
|
|
|
raise Muse::Dl::Errors::PDFOperationError.new("Error adding metadata to book.")
|
|
|
|
end
|
2020-03-29 13:21:07 +00:00
|
|
|
metadata_text_file.delete
|
|
|
|
end
|
|
|
|
|
2020-06-30 12:06:44 +00:00
|
|
|
def add_metadata(input_file : File, output_file : String, issue : Issue)
|
|
|
|
# First we have to dump the current metadata
|
|
|
|
metadata_text_file = File.tempfile("muse-dl-metadata-tmp", ".txt")
|
|
|
|
keywords = "Journal:#{issue.journal_title}, Published:#{issue.date},Volume:#{issue.volume},Number:#{issue.number}"
|
|
|
|
["ISSN", "Print ISSN", "DOI", "Language", "Open Access"].each do |label|
|
|
|
|
if issue.info.has_key? label
|
|
|
|
keywords += ", #{label}:#{issue.info[label]}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# TODO: Move this to Issue class
|
|
|
|
|
|
|
|
s = issue.summary
|
|
|
|
unless s.nil?
|
|
|
|
summary = s.gsub(/\n\s+/, " ")
|
|
|
|
else
|
|
|
|
summary = "NA"
|
|
|
|
end
|
|
|
|
|
|
|
|
t = issue.title
|
|
|
|
|
|
|
|
unless t.nil?
|
|
|
|
title = t
|
|
|
|
else
|
|
|
|
title = "NA"
|
|
|
|
end
|
|
|
|
# TODO: Add support for all authors in the PDF
|
|
|
|
metadata = gen_metadata(title, keywords, summary)
|
|
|
|
write_metadata(input_file, output_file, metadata)
|
|
|
|
end
|
|
|
|
|
2020-03-29 13:21:07 +00:00
|
|
|
def stitch(chapter_ids : Array(String))
|
|
|
|
output_file = File.tempfile("muse-dl-stitched-tmp", ".pdf")
|
2020-03-29 12:21:01 +00:00
|
|
|
# Do some sanity checks on each Chapter PDF
|
|
|
|
chapter_ids.each do |id|
|
2020-06-30 12:06:44 +00:00
|
|
|
raise Muse::Dl::Errors::MissingFile.new unless File.exists? Fetch.chapter_file_name(id, @tmp_file_path)
|
2020-03-29 12:21:01 +00:00
|
|
|
raise Muse::Dl::Errors::CorruptFile.new unless File.size(Fetch.chapter_file_name(id, @tmp_file_path)) > 0
|
|
|
|
end
|
|
|
|
|
|
|
|
# Now let's stitch them together
|
|
|
|
|
|
|
|
chapter_files = chapter_ids.map { |id| Fetch.chapter_file_name(id, @tmp_file_path) }
|
2020-03-29 13:21:07 +00:00
|
|
|
args = chapter_files + ["cat", "output", output_file.path]
|
2020-04-19 05:58:20 +00:00
|
|
|
is_success = execute args
|
2020-03-29 12:21:01 +00:00
|
|
|
|
|
|
|
# TODO: Validate final file here
|
2020-04-19 05:58:20 +00:00
|
|
|
if !is_success
|
|
|
|
raise Muse::Dl::Errors::PDFOperationError.new("Error stitching chapters together.")
|
|
|
|
end
|
2020-03-29 13:21:07 +00:00
|
|
|
|
|
|
|
return output_file
|
2020-03-29 12:21:01 +00:00
|
|
|
end
|
2020-06-30 12:06:44 +00:00
|
|
|
|
|
|
|
# TODO: Merge with stitch
|
|
|
|
def stitch_articles(article_ids : Array(String))
|
|
|
|
output_file = File.tempfile("muse-dl-stitched-tmp", ".pdf")
|
|
|
|
# Do some sanity checks on each Chapter PDF
|
|
|
|
article_ids.each do |id|
|
|
|
|
raise Muse::Dl::Errors::MissingFile.new unless File.exists? Fetch.article_file_name(id, @tmp_file_path)
|
|
|
|
raise Muse::Dl::Errors::CorruptFile.new unless File.size(Fetch.article_file_name(id, @tmp_file_path)) > 0
|
|
|
|
end
|
|
|
|
|
|
|
|
# Now let's stitch them together
|
|
|
|
article_files = article_ids.map { |id| Fetch.article_file_name(id, @tmp_file_path) }
|
|
|
|
args = article_files + ["cat", "output", output_file.path]
|
|
|
|
is_success = execute args
|
|
|
|
|
|
|
|
# TODO: Validate final file here
|
|
|
|
if !is_success
|
|
|
|
puts args
|
|
|
|
raise Muse::Dl::Errors::PDFOperationError.new("Error stitching articles together.")
|
|
|
|
end
|
|
|
|
|
|
|
|
return output_file
|
|
|
|
end
|
2020-03-28 19:29:47 +00:00
|
|
|
end
|
|
|
|
end
|