Adds --clobber support

This commit is contained in:
Nemo 2020-03-30 00:08:10 +05:30
parent d3a603a209
commit 79b95e2a67
4 changed files with 51 additions and 26 deletions

View File

@ -21,7 +21,7 @@ module Muse::Dl
end end
def self.author(myhtml : Myhtml::Parser) def self.author(myhtml : Myhtml::Parser)
myhtml.css("#book_about_info .author").map(&.inner_text).to_a[0].strip myhtml.css("#book_about_info .author").map(&.inner_text).to_a[0].strip.gsub("<BR>", ", ").gsub("\n", " ")
end end
def self.date(myhtml : Myhtml::Parser) def self.date(myhtml : Myhtml::Parser)
@ -33,7 +33,12 @@ module Muse::Dl
end end
def self.summary(myhtml : Myhtml::Parser) def self.summary(myhtml : Myhtml::Parser)
myhtml.css("#book_about_info .card_summary").map(&.inner_text).to_a[0].strip begin
return myhtml.css("#book_about_info .card_summary").map(&.inner_text).to_a[0].strip
rescue e : Exception
STDERR.puts "Could not fetch summary"
return "NA"
end
end end
def self.summary_html(myhtml : Myhtml::Parser) def self.summary_html(myhtml : Myhtml::Parser)

View File

@ -17,17 +17,30 @@ module Muse::Dl
# Will have no effect if parser has a custom title # Will have no effect if parser has a custom title
parser.output = Util.slug_filename "#{thing.title}.pdf" parser.output = Util.slug_filename "#{thing.title}.pdf"
# Save each chapter # If file exists and we can't clobber
thing.chapters.each do |chapter| if File.exists?(parser.output) && parser.clobber == false
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.bookmarks) STDERR.puts "File already exists, not doing anything"
Process.exit(1)
end end
chapter_ids = thing.chapters.map { |c| c[0] } temp_stitched_file = nil
# Stitch the PDFs together
pdf_builder = Pdftk.new(parser.tmp) pdf_builder = Pdftk.new(parser.tmp)
temp_stitched_file = pdf_builder.stitch chapter_ids
pdf_builder.add_metadata(temp_stitched_file, parser.output, thing) unless parser.input_pdf
temp_stitched_file.delete # Save each chapter
thing.chapters.each do |chapter|
Fetch.save_chapter(parser.tmp, chapter[0], chapter[1], parser.bookmarks)
end
chapter_ids = thing.chapters.map { |c| c[0] }
# Stitch the PDFs together
temp_stitched_file = pdf_builder.stitch chapter_ids
pdf_builder.add_metadata(temp_stitched_file, parser.output, thing)
else
x = parser.input_pdf
pdf_builder.add_metadata(File.open(x), parser.output, thing) if x
end
temp_stitched_file.delete if temp_stitched_file
puts "Saved final output to #{parser.output}" puts "Saved final output to #{parser.output}"
end end
end end

View File

@ -8,10 +8,12 @@ module Muse::Dl
@cleanup = true @cleanup = true
@output = DEFAULT_FILE_NAME @output = DEFAULT_FILE_NAME
@url = "INVALID_URL" @url = "INVALID_URL"
@input_pdf : String | Nil
@clobber = false
DEFAULT_FILE_NAME = "tempfilename.pdf" DEFAULT_FILE_NAME = "tempfilename.pdf"
getter :bookmarks, :tmp, :cleanup, :output, :url getter :bookmarks, :tmp, :cleanup, :output, :url, :input_pdf, :clobber
# Update the output filename unless we have a custom one passed # Update the output filename unless we have a custom one passed
def output=(output_file : String) def output=(output_file : String)
@ -29,6 +31,7 @@ module Muse::Dl
def initialize(arg : Array(String) = [] of String) def initialize(arg : Array(String) = [] of String)
@tmp = Dir.tempdir @tmp = Dir.tempdir
@input_pdf = nil
parser = OptionParser.new parser = OptionParser.new
parser.banner = "Usage: muse-dl [--flags] URL" parser.banner = "Usage: muse-dl [--flags] URL"
@ -36,6 +39,8 @@ module Muse::Dl
parser.on(long_flag = "--tmp-dir PATH", description = "Temporary Directory to use") { |path| @tmp = path } parser.on(long_flag = "--tmp-dir PATH", description = "Temporary Directory to use") { |path| @tmp = path }
parser.on(long_flag = "--output FILE", description = "Output Filename") { |file| @output = file } parser.on(long_flag = "--output FILE", description = "Output Filename") { |file| @output = file }
parser.on(long_flag = "--no-bookmarks", description = "Don't add bookmarks in the PDF") { @bookmarks = false } parser.on(long_flag = "--no-bookmarks", description = "Don't add bookmarks in the PDF") { @bookmarks = false }
parser.on(long_flag = "--input-pdf INPUT", description = "Input Stitched PDF. Will not download anything") { |input| @input_pdf = input }
parser.on(long_flag = "--clobber", description = "Overwrite the output file, if it already exists") { @clobber = true }
parser.on("-h", "--help", "Show this help") { puts parser } parser.on("-h", "--help", "Show this help") { puts parser }
parser.unknown_args do |args| parser.unknown_args do |args|

View File

@ -54,7 +54,15 @@ module Muse::Dl
def add_metadata(input_file : File, output_file : String, book : Book) def add_metadata(input_file : File, output_file : String, book : Book)
# First we have to dump the current metadata # First we have to dump the current metadata
metadata_text_file = File.tempfile("muse-dl-metadata-tmp", ".txt") metadata_text_file = File.tempfile("muse-dl-metadata-tmp", ".txt")
# TODO: Add version info in the Creator/Producer keywords = "Publisher:#{book.publisher}, Published:#{book.date}"
# Known Info keys, if they are present
["ISBN", "Related ISBN", "DOI", "Language", "OCLC"].each do |label|
if book.info.has_key? label
keywords += ", #{label}:#{book.info[label]}"
end
end
text = <<-EOT text = <<-EOT
InfoBegin InfoBegin
InfoKey: Creator InfoKey: Creator
@ -67,27 +75,21 @@ module Muse::Dl
InfoValue: #{book.title} InfoValue: #{book.title}
InfoBegin InfoBegin
InfoKey: Keywords InfoKey: Keywords
InfoValue: Publisher:#{book.publisher}, Published:#{book.date} InfoValue: #{keywords}
InfoBegin InfoBegin
InfoKey: Author InfoKey: Author
InfoValue: #{book.author} InfoValue: #{book.author}
InfoBegin InfoBegin
InfoKey: Subject InfoKey: Subject
InfoValue: #{book.summary.gsub(/\n\s+/, " ")} InfoValue: #{book.summary.gsub(/\n\s+/, " ")}
InfoBegin
InfoKey: ModDate
InfoValue:
InfoBegin
InfoKey: CreationDate
InfoValue:
EOT EOT
# Known Info keys, if they are present
["ISBN", "Related ISBN", "DOI", "Language", "OCLC"].each do |label|
if book.info.has_key? label
text += <<-EOT
InfoBegin
InfoKey: #{label}
InfoValue: #{book.info[label]}
EOT
end
end
File.write(metadata_text_file.path, text) File.write(metadata_text_file.path, text)
execute [input_file.path, "update_info_utf8", metadata_text_file.path, "output", output_file] execute [input_file.path, "update_info_utf8", metadata_text_file.path, "output", output_file]
metadata_text_file.delete metadata_text_file.delete