Remove old metadata text file code

This commit is contained in:
Nemo 2021-05-28 21:15:51 +05:30
parent c206324026
commit 5c0c16a2ee
1 changed files with 40 additions and 41 deletions

View File

@ -3,7 +3,6 @@ import markdown
from .bookmark import Bookmark from .bookmark import Bookmark
import html5lib import html5lib
from PyPDF2 import PdfFileWriter, PdfFileReader from PyPDF2 import PdfFileWriter, PdfFileReader
import subprocess
import tempfile import tempfile
import logging import logging
@ -63,13 +62,6 @@ class Stitcher:
if b: if b:
self.bookmarks.append(b) self.bookmarks.append(b)
def _add_bookmark(self, targetFileHandle, page, title, level):
targetFileHandle.write("BookmarkBegin\n")
targetFileHandle.write("BookmarkTitle: " + title + "\n")
targetFileHandle.write("BookmarkLevel: " + str(level) + "\n")
targetFileHandle.write("BookmarkPageNumber: " + str(page) + "\n")
targetFileHandle.write("BookmarkZoom: FitHeight\n")
def _existingBookmarkConfig(self): def _existingBookmarkConfig(self):
return self._getAttribute('existing_bookmarks') return self._getAttribute('existing_bookmarks')
@ -79,39 +71,32 @@ class Stitcher:
def _flattenBookmarks(self): def _flattenBookmarks(self):
return (self._existingBookmarkConfig() == 'flatten') return (self._existingBookmarkConfig() == 'flatten')
def _generate_metadata(self, filename): """
with open(filename, 'w') as target: Adds the existing bookmarks into the
if (self.title): self.bookmarks list
target.write("InfoBegin\n") """
target.write("InfoKey: Title\n") def _add_existing_bookmarks(self):
target.write("InfoValue: " + self.title + "\n") self.bookmarks.sort()
self.bookmarks.sort() bookmarks = self.bookmarks.copy()
bookmarks = self.bookmarks.copy() if (self._removeExistingBookmarks() != True):
for b in self.oldBookmarks:
outer_level = self._get_level_from_page_number(b.page+1)
if (self._flattenBookmarks()):
increment = 2
else:
increment = b.level
level = outer_level + increment - 1
bookmarks.append(Bookmark(b.page+1, b.title, level))
if (self._removeExistingBookmarks() != True): bookmarks.sort()
for b in self.oldBookmarks: self.bookmarks = bookmarks
# _logger.info("Checking for %s(%s)", b.title, b.page+1)
outer_level = self._get_level_from_page_number(b.page+1)
# _logger.info("Got Level: %s", outer_level)
if (self._flattenBookmarks()):
increment = 2
else:
increment = b.level
level = outer_level + increment - 1
bookmarks.append(Bookmark(b.page+1, b.title, level))
bookmarks.sort()
self.bookmarks = bookmarks
# self._print_bookmarks()
for b in bookmarks:
self._add_bookmark(target, b.page, b.title, b.level)
def _print_bookmarks(self):
for b in self.bookmarks:
print((" " *( b.level-1)) + b.title + "("+str(b.page)+")")
"""
Gets the last bookmkark level at a given page number
on the combined PDF
"""
def _get_level_from_page_number(self, page): def _get_level_from_page_number(self, page):
previousBookmarkLevel = self.bookmarks[0].level previousBookmarkLevel = self.bookmarks[0].level
for b in self.bookmarks: for b in self.bookmarks:
@ -122,6 +107,10 @@ class Stitcher:
previousBookmarkLevel = b.level previousBookmarkLevel = b.level
return previousBookmarkLevel return previousBookmarkLevel
"""
Recursive method to read the old bookmarks (which are nested)
and push them to self.oldBookmarks
"""
def _iterate_old_bookmarks(self, pdf, startPage, bookmarks, level = 1): def _iterate_old_bookmarks(self, pdf, startPage, bookmarks, level = 1):
if (isinstance(bookmarks, list)): if (isinstance(bookmarks, list)):
for inner_bookmark in bookmarks: for inner_bookmark in bookmarks:
@ -135,6 +124,7 @@ class Stitcher:
""" """
Insert the bookmarks into the PDF file Insert the bookmarks into the PDF file
Ref: https://stackoverflow.com/a/18867646 Ref: https://stackoverflow.com/a/18867646
# TODO: Interleave this into the merge method somehow
""" """
def _update_metadata(self, old_filename, outputFilename): def _update_metadata(self, old_filename, outputFilename):
stack = [] stack = []
@ -155,11 +145,17 @@ class Stitcher:
stack.append((b, pdfOutput.addBookmark(b.title, b.page - 1))) stack.append((b, pdfOutput.addBookmark(b.title, b.page - 1)))
pdfOutput.write(open(outputFilename, 'wb')) pdfOutput.write(open(outputFilename, 'wb'))
"""
Merge the PDF files together in order
and iterate through the old bookmarks
as we're reading them
"""
def _merge(self, output): def _merge(self, output):
writer = PdfFileWriter() writer = PdfFileWriter()
for (inputFile,startPage) in self.files: for (inputFile,startPage) in self.files:
assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile) assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile)
reader = PdfFileReader(open(inputFile, 'rb')) reader = PdfFileReader(open(inputFile, 'rb'))
# Recursively iterate through the old bookmarks
self._iterate_old_bookmarks(reader, startPage, reader.getOutlines()) self._iterate_old_bookmarks(reader, startPage, reader.getOutlines())
for page in range(1, reader.getNumPages()+1): for page in range(1, reader.getNumPages()+1):
writer.addPage(reader.getPage(page - 1)) writer.addPage(reader.getPage(page - 1))
@ -167,13 +163,15 @@ class Stitcher:
writer.write(output) writer.write(output)
output.close() output.close()
"""
Main entrypoint to generate the final PDF
"""
def generate(self, outputFilename, cleanup = False): def generate(self, outputFilename, cleanup = False):
tempPdf = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) tempPdf = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
tempMetadataFile = tempfile.NamedTemporaryFile(suffix=".txt", delete=False)
self._merge(tempPdf) self._merge(tempPdf)
self._generate_metadata(tempMetadataFile.name) # Only read the additional bookmarks if we're not removing them
if (not self._removeExistingBookmarks())
self._add_existing_bookmarks()
self._update_metadata(tempPdf.name, outputFilename) self._update_metadata(tempPdf.name, outputFilename)
if (cleanup): if (cleanup):
@ -181,4 +179,5 @@ class Stitcher:
os.remove(tempMetadataFile.name) os.remove(tempMetadataFile.name)
os.remove(tempPdf.name) os.remove(tempPdf.name)
else: else:
# Why print? Because this is not logging, this is output
print("Temporary files saved as ", tempPdf.name, tempMetadataFile.name) print("Temporary files saved as ", tempPdf.name, tempMetadataFile.name)