Adds back metadata support for PDF

2024-07-26 20:42:56 +00:00 · 2021-05-28 21:29:02 +05:30 · 2021-05-28 21:29:02 +05:30 · d047844ad0
commit d047844ad0
parent 5c0c16a2ee
2 changed files with 29 additions and 4 deletions
--- a/src/pystitcher/stitcher.py
+++ b/src/pystitcher/stitcher.py
@ -3,6 +3,7 @@ import markdown
 from .bookmark import Bookmark
 import html5lib
 from PyPDF2 import PdfFileWriter, PdfFileReader
+from pystitcher import __version__
 import tempfile
 import logging

@ -30,15 +31,36 @@ class Stitcher:
        for e in document.iter():
            self.iter(e)

+    """
+    Get the number of pages in a PDF file
+    """
    def _get_pdf_number_of_pages(self, filename):
        assert os.path.isfile(filename) and os.access(filename, os.R_OK), \
                "File {} doesn't exist or isn't readable".format(filename)
        pdf_reader = PdfFileReader(open(filename, "rb"))
        return pdf_reader.numPages

+    """
+    Return an attribute with a default value of None
+    """
    def _getAttribute(self, key):
        return self.attributes.get(key, [None])[0]

+    def _getMetadata(self):
+        meta = {'/Producer': "pystitcher/%s" % __version__, '/Creator': "pystitcher/%s" % __version__}
+        if (self._getAttribute('author')):
+            meta["/Author"] = self._getAttribute('author')
+        if (self._getAttribute('title')):
+            meta["/Title"] = self._getAttribute('title')
+        elif self.title:
+            meta["/Title"] = self.title
+        if (self._getAttribute('subject')):
+            meta["/Subject"] = self._getAttribute('subject')
+        if (self._getAttribute('keywords')):
+            meta["/Keywords"] = self._getAttribute('keywords')
+
+        return meta
+
    def iter(self, element):
        tag = element.tag
        b = None
@ -57,7 +79,6 @@ class Stitcher:
            file = element.attrib.get('href')
            b = Bookmark(self.currentPage, element.text, self.currentLevel+1)
            self.files.append((file, self.currentPage))
-            # _logger.info("File: %s starts at %s", file, self.currentLevel)
            self.currentPage += self._get_pdf_number_of_pages(file)
        if b:
            self.bookmarks.append(b)
@ -143,6 +164,7 @@ class Stitcher:
            # Else, push to top
            else:
                stack.append((b, pdfOutput.addBookmark(b.title, b.page - 1)))
+        pdfOutput.addMetadata(self._getMetadata())
        pdfOutput.write(open(outputFilename, 'wb'))

    """
@ -170,14 +192,13 @@ class Stitcher:
        tempPdf = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
        self._merge(tempPdf)
        # Only read the additional bookmarks if we're not removing them
-        if (not self._removeExistingBookmarks())
+        if (not self._removeExistingBookmarks()):
            self._add_existing_bookmarks()
        self._update_metadata(tempPdf.name, outputFilename)

        if (cleanup):
            _logger.info("Deleting temporary files")
-            os.remove(tempMetadataFile.name)
            os.remove(tempPdf.name)
        else:
            # Why print? Because this is not logging, this is output
-            print("Temporary files saved as ", tempPdf.name, tempMetadataFile.name)
+            print("Temporary PDF file saved as ", tempPdf.name)
--- a/tests/book-keep.md
+++ b/tests/book-keep.md
@ -1,4 +1,8 @@
 existing_bookmarks: keep
+title: Super Jelly Book
+author: Wiki, the Cat
+subject: A book about adventures of Wiki, the cat.
+keywords: wiki,potato,jelly
 # Super Potato Book

 # Volume 1