mirror of https://github.com/captn3m0/muse-dl.git
Adds --clobber support
This commit is contained in:
parent
d3a603a209
commit
79b95e2a67
|
@ -21,7 +21,7 @@ module Muse::Dl
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.author(myhtml : Myhtml::Parser)
|
def self.author(myhtml : Myhtml::Parser)
|
||||||
myhtml.css("#book_about_info .author").map(&.inner_text).to_a[0].strip
|
myhtml.css("#book_about_info .author").map(&.inner_text).to_a[0].strip.gsub("<BR>", ", ").gsub("\n", " ")
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.date(myhtml : Myhtml::Parser)
|
def self.date(myhtml : Myhtml::Parser)
|
||||||
|
@ -33,7 +33,12 @@ module Muse::Dl
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.summary(myhtml : Myhtml::Parser)
|
def self.summary(myhtml : Myhtml::Parser)
|
||||||
myhtml.css("#book_about_info .card_summary").map(&.inner_text).to_a[0].strip
|
begin
|
||||||
|
return myhtml.css("#book_about_info .card_summary").map(&.inner_text).to_a[0].strip
|
||||||
|
rescue e : Exception
|
||||||
|
STDERR.puts "Could not fetch summary"
|
||||||
|
return "NA"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.summary_html(myhtml : Myhtml::Parser)
|
def self.summary_html(myhtml : Myhtml::Parser)
|
||||||
|
|
|
@ -17,17 +17,30 @@ module Muse::Dl
|
||||||
# Will have no effect if parser has a custom title
|
# Will have no effect if parser has a custom title
|
||||||
parser.output = Util.slug_filename "#{thing.title}.pdf"
|
parser.output = Util.slug_filename "#{thing.title}.pdf"
|
||||||
|
|
||||||
# Save each chapter
|
# If file exists and we can't clobber
|
||||||
thing.chapters.each do |chapter|
|
if File.exists?(parser.output) && parser.clobber == false
|
||||||
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.bookmarks)
|
STDERR.puts "File already exists, not doing anything"
|
||||||
|
Process.exit(1)
|
||||||
end
|
end
|
||||||
chapter_ids = thing.chapters.map { |c| c[0] }
|
temp_stitched_file = nil
|
||||||
|
|
||||||
# Stitch the PDFs together
|
|
||||||
pdf_builder = Pdftk.new(parser.tmp)
|
pdf_builder = Pdftk.new(parser.tmp)
|
||||||
temp_stitched_file = pdf_builder.stitch chapter_ids
|
|
||||||
pdf_builder.add_metadata(temp_stitched_file, parser.output, thing)
|
unless parser.input_pdf
|
||||||
temp_stitched_file.delete
|
# Save each chapter
|
||||||
|
thing.chapters.each do |chapter|
|
||||||
|
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.bookmarks)
|
||||||
|
end
|
||||||
|
chapter_ids = thing.chapters.map { |c| c[0] }
|
||||||
|
|
||||||
|
# Stitch the PDFs together
|
||||||
|
temp_stitched_file = pdf_builder.stitch chapter_ids
|
||||||
|
pdf_builder.add_metadata(temp_stitched_file, parser.output, thing)
|
||||||
|
else
|
||||||
|
x = parser.input_pdf
|
||||||
|
pdf_builder.add_metadata(File.open(x), parser.output, thing) if x
|
||||||
|
end
|
||||||
|
|
||||||
|
temp_stitched_file.delete if temp_stitched_file
|
||||||
puts "Saved final output to #{parser.output}"
|
puts "Saved final output to #{parser.output}"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -8,10 +8,12 @@ module Muse::Dl
|
||||||
@cleanup = true
|
@cleanup = true
|
||||||
@output = DEFAULT_FILE_NAME
|
@output = DEFAULT_FILE_NAME
|
||||||
@url = "INVALID_URL"
|
@url = "INVALID_URL"
|
||||||
|
@input_pdf : String | Nil
|
||||||
|
@clobber = false
|
||||||
|
|
||||||
DEFAULT_FILE_NAME = "tempfilename.pdf"
|
DEFAULT_FILE_NAME = "tempfilename.pdf"
|
||||||
|
|
||||||
getter :bookmarks, :tmp, :cleanup, :output, :url
|
getter :bookmarks, :tmp, :cleanup, :output, :url, :input_pdf, :clobber
|
||||||
|
|
||||||
# Update the output filename unless we have a custom one passed
|
# Update the output filename unless we have a custom one passed
|
||||||
def output=(output_file : String)
|
def output=(output_file : String)
|
||||||
|
@ -29,6 +31,7 @@ module Muse::Dl
|
||||||
|
|
||||||
def initialize(arg : Array(String) = [] of String)
|
def initialize(arg : Array(String) = [] of String)
|
||||||
@tmp = Dir.tempdir
|
@tmp = Dir.tempdir
|
||||||
|
@input_pdf = nil
|
||||||
|
|
||||||
parser = OptionParser.new
|
parser = OptionParser.new
|
||||||
parser.banner = "Usage: muse-dl [--flags] URL"
|
parser.banner = "Usage: muse-dl [--flags] URL"
|
||||||
|
@ -36,6 +39,8 @@ module Muse::Dl
|
||||||
parser.on(long_flag = "--tmp-dir PATH", description = "Temporary Directory to use") { |path| @tmp = path }
|
parser.on(long_flag = "--tmp-dir PATH", description = "Temporary Directory to use") { |path| @tmp = path }
|
||||||
parser.on(long_flag = "--output FILE", description = "Output Filename") { |file| @output = file }
|
parser.on(long_flag = "--output FILE", description = "Output Filename") { |file| @output = file }
|
||||||
parser.on(long_flag = "--no-bookmarks", description = "Don't add bookmarks in the PDF") { @bookmarks = false }
|
parser.on(long_flag = "--no-bookmarks", description = "Don't add bookmarks in the PDF") { @bookmarks = false }
|
||||||
|
parser.on(long_flag = "--input-pdf INPUT", description = "Input Stitched PDF. Will not download anything") { |input| @input_pdf = input }
|
||||||
|
parser.on(long_flag = "--clobber", description = "Overwrite the output file, if it already exists") { @clobber = true }
|
||||||
parser.on("-h", "--help", "Show this help") { puts parser }
|
parser.on("-h", "--help", "Show this help") { puts parser }
|
||||||
|
|
||||||
parser.unknown_args do |args|
|
parser.unknown_args do |args|
|
||||||
|
|
30
src/pdftk.cr
30
src/pdftk.cr
|
@ -54,7 +54,15 @@ module Muse::Dl
|
||||||
def add_metadata(input_file : File, output_file : String, book : Book)
|
def add_metadata(input_file : File, output_file : String, book : Book)
|
||||||
# First we have to dump the current metadata
|
# First we have to dump the current metadata
|
||||||
metadata_text_file = File.tempfile("muse-dl-metadata-tmp", ".txt")
|
metadata_text_file = File.tempfile("muse-dl-metadata-tmp", ".txt")
|
||||||
# TODO: Add version info in the Creator/Producer
|
keywords = "Publisher:#{book.publisher}, Published:#{book.date}"
|
||||||
|
|
||||||
|
# Known Info keys, if they are present
|
||||||
|
["ISBN", "Related ISBN", "DOI", "Language", "OCLC"].each do |label|
|
||||||
|
if book.info.has_key? label
|
||||||
|
keywords += ", #{label}:#{book.info[label]}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
text = <<-EOT
|
text = <<-EOT
|
||||||
InfoBegin
|
InfoBegin
|
||||||
InfoKey: Creator
|
InfoKey: Creator
|
||||||
|
@ -67,27 +75,21 @@ module Muse::Dl
|
||||||
InfoValue: #{book.title}
|
InfoValue: #{book.title}
|
||||||
InfoBegin
|
InfoBegin
|
||||||
InfoKey: Keywords
|
InfoKey: Keywords
|
||||||
InfoValue: Publisher:#{book.publisher}, Published:#{book.date}
|
InfoValue: #{keywords}
|
||||||
InfoBegin
|
InfoBegin
|
||||||
InfoKey: Author
|
InfoKey: Author
|
||||||
InfoValue: #{book.author}
|
InfoValue: #{book.author}
|
||||||
InfoBegin
|
InfoBegin
|
||||||
InfoKey: Subject
|
InfoKey: Subject
|
||||||
InfoValue: #{book.summary.gsub(/\n\s+/, " ")}
|
InfoValue: #{book.summary.gsub(/\n\s+/, " ")}
|
||||||
|
InfoBegin
|
||||||
|
InfoKey: ModDate
|
||||||
|
InfoValue:
|
||||||
|
InfoBegin
|
||||||
|
InfoKey: CreationDate
|
||||||
|
InfoValue:
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# Known Info keys, if they are present
|
|
||||||
|
|
||||||
["ISBN", "Related ISBN", "DOI", "Language", "OCLC"].each do |label|
|
|
||||||
if book.info.has_key? label
|
|
||||||
text += <<-EOT
|
|
||||||
InfoBegin
|
|
||||||
InfoKey: #{label}
|
|
||||||
InfoValue: #{book.info[label]}
|
|
||||||
EOT
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
File.write(metadata_text_file.path, text)
|
File.write(metadata_text_file.path, text)
|
||||||
execute [input_file.path, "update_info_utf8", metadata_text_file.path, "output", output_file]
|
execute [input_file.path, "update_info_utf8", metadata_text_file.path, "output", output_file]
|
||||||
metadata_text_file.delete
|
metadata_text_file.delete
|
||||||
|
|
Loading…
Reference in New Issue