Adds back metadata support for PDF
This commit is contained in:
parent
5c0c16a2ee
commit
d047844ad0
|
@ -3,6 +3,7 @@ import markdown
|
||||||
from .bookmark import Bookmark
|
from .bookmark import Bookmark
|
||||||
import html5lib
|
import html5lib
|
||||||
from PyPDF2 import PdfFileWriter, PdfFileReader
|
from PyPDF2 import PdfFileWriter, PdfFileReader
|
||||||
|
from pystitcher import __version__
|
||||||
import tempfile
|
import tempfile
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
@ -30,15 +31,36 @@ class Stitcher:
|
||||||
for e in document.iter():
|
for e in document.iter():
|
||||||
self.iter(e)
|
self.iter(e)
|
||||||
|
|
||||||
|
"""
|
||||||
|
Get the number of pages in a PDF file
|
||||||
|
"""
|
||||||
def _get_pdf_number_of_pages(self, filename):
|
def _get_pdf_number_of_pages(self, filename):
|
||||||
assert os.path.isfile(filename) and os.access(filename, os.R_OK), \
|
assert os.path.isfile(filename) and os.access(filename, os.R_OK), \
|
||||||
"File {} doesn't exist or isn't readable".format(filename)
|
"File {} doesn't exist or isn't readable".format(filename)
|
||||||
pdf_reader = PdfFileReader(open(filename, "rb"))
|
pdf_reader = PdfFileReader(open(filename, "rb"))
|
||||||
return pdf_reader.numPages
|
return pdf_reader.numPages
|
||||||
|
|
||||||
|
"""
|
||||||
|
Return an attribute with a default value of None
|
||||||
|
"""
|
||||||
def _getAttribute(self, key):
|
def _getAttribute(self, key):
|
||||||
return self.attributes.get(key, [None])[0]
|
return self.attributes.get(key, [None])[0]
|
||||||
|
|
||||||
|
def _getMetadata(self):
|
||||||
|
meta = {'/Producer': "pystitcher/%s" % __version__, '/Creator': "pystitcher/%s" % __version__}
|
||||||
|
if (self._getAttribute('author')):
|
||||||
|
meta["/Author"] = self._getAttribute('author')
|
||||||
|
if (self._getAttribute('title')):
|
||||||
|
meta["/Title"] = self._getAttribute('title')
|
||||||
|
elif self.title:
|
||||||
|
meta["/Title"] = self.title
|
||||||
|
if (self._getAttribute('subject')):
|
||||||
|
meta["/Subject"] = self._getAttribute('subject')
|
||||||
|
if (self._getAttribute('keywords')):
|
||||||
|
meta["/Keywords"] = self._getAttribute('keywords')
|
||||||
|
|
||||||
|
return meta
|
||||||
|
|
||||||
def iter(self, element):
|
def iter(self, element):
|
||||||
tag = element.tag
|
tag = element.tag
|
||||||
b = None
|
b = None
|
||||||
|
@ -57,7 +79,6 @@ class Stitcher:
|
||||||
file = element.attrib.get('href')
|
file = element.attrib.get('href')
|
||||||
b = Bookmark(self.currentPage, element.text, self.currentLevel+1)
|
b = Bookmark(self.currentPage, element.text, self.currentLevel+1)
|
||||||
self.files.append((file, self.currentPage))
|
self.files.append((file, self.currentPage))
|
||||||
# _logger.info("File: %s starts at %s", file, self.currentLevel)
|
|
||||||
self.currentPage += self._get_pdf_number_of_pages(file)
|
self.currentPage += self._get_pdf_number_of_pages(file)
|
||||||
if b:
|
if b:
|
||||||
self.bookmarks.append(b)
|
self.bookmarks.append(b)
|
||||||
|
@ -143,6 +164,7 @@ class Stitcher:
|
||||||
# Else, push to top
|
# Else, push to top
|
||||||
else:
|
else:
|
||||||
stack.append((b, pdfOutput.addBookmark(b.title, b.page - 1)))
|
stack.append((b, pdfOutput.addBookmark(b.title, b.page - 1)))
|
||||||
|
pdfOutput.addMetadata(self._getMetadata())
|
||||||
pdfOutput.write(open(outputFilename, 'wb'))
|
pdfOutput.write(open(outputFilename, 'wb'))
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -170,14 +192,13 @@ class Stitcher:
|
||||||
tempPdf = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
|
tempPdf = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
|
||||||
self._merge(tempPdf)
|
self._merge(tempPdf)
|
||||||
# Only read the additional bookmarks if we're not removing them
|
# Only read the additional bookmarks if we're not removing them
|
||||||
if (not self._removeExistingBookmarks())
|
if (not self._removeExistingBookmarks()):
|
||||||
self._add_existing_bookmarks()
|
self._add_existing_bookmarks()
|
||||||
self._update_metadata(tempPdf.name, outputFilename)
|
self._update_metadata(tempPdf.name, outputFilename)
|
||||||
|
|
||||||
if (cleanup):
|
if (cleanup):
|
||||||
_logger.info("Deleting temporary files")
|
_logger.info("Deleting temporary files")
|
||||||
os.remove(tempMetadataFile.name)
|
|
||||||
os.remove(tempPdf.name)
|
os.remove(tempPdf.name)
|
||||||
else:
|
else:
|
||||||
# Why print? Because this is not logging, this is output
|
# Why print? Because this is not logging, this is output
|
||||||
print("Temporary files saved as ", tempPdf.name, tempMetadataFile.name)
|
print("Temporary PDF file saved as ", tempPdf.name)
|
||||||
|
|
|
@ -1,4 +1,8 @@
|
||||||
existing_bookmarks: keep
|
existing_bookmarks: keep
|
||||||
|
title: Super Jelly Book
|
||||||
|
author: Wiki, the Cat
|
||||||
|
subject: A book about adventures of Wiki, the cat.
|
||||||
|
keywords: wiki,potato,jelly
|
||||||
# Super Potato Book
|
# Super Potato Book
|
||||||
|
|
||||||
# Volume 1
|
# Volume 1
|
||||||
|
|
Loading…
Reference in New Issue