Remove old metadata text file code
This commit is contained in:
parent
c206324026
commit
5c0c16a2ee
|
@ -3,7 +3,6 @@ import markdown
|
||||||
from .bookmark import Bookmark
|
from .bookmark import Bookmark
|
||||||
import html5lib
|
import html5lib
|
||||||
from PyPDF2 import PdfFileWriter, PdfFileReader
|
from PyPDF2 import PdfFileWriter, PdfFileReader
|
||||||
import subprocess
|
|
||||||
import tempfile
|
import tempfile
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
@ -63,13 +62,6 @@ class Stitcher:
|
||||||
if b:
|
if b:
|
||||||
self.bookmarks.append(b)
|
self.bookmarks.append(b)
|
||||||
|
|
||||||
def _add_bookmark(self, targetFileHandle, page, title, level):
|
|
||||||
targetFileHandle.write("BookmarkBegin\n")
|
|
||||||
targetFileHandle.write("BookmarkTitle: " + title + "\n")
|
|
||||||
targetFileHandle.write("BookmarkLevel: " + str(level) + "\n")
|
|
||||||
targetFileHandle.write("BookmarkPageNumber: " + str(page) + "\n")
|
|
||||||
targetFileHandle.write("BookmarkZoom: FitHeight\n")
|
|
||||||
|
|
||||||
def _existingBookmarkConfig(self):
|
def _existingBookmarkConfig(self):
|
||||||
return self._getAttribute('existing_bookmarks')
|
return self._getAttribute('existing_bookmarks')
|
||||||
|
|
||||||
|
@ -79,39 +71,32 @@ class Stitcher:
|
||||||
def _flattenBookmarks(self):
|
def _flattenBookmarks(self):
|
||||||
return (self._existingBookmarkConfig() == 'flatten')
|
return (self._existingBookmarkConfig() == 'flatten')
|
||||||
|
|
||||||
def _generate_metadata(self, filename):
|
"""
|
||||||
with open(filename, 'w') as target:
|
Adds the existing bookmarks into the
|
||||||
if (self.title):
|
self.bookmarks list
|
||||||
target.write("InfoBegin\n")
|
"""
|
||||||
target.write("InfoKey: Title\n")
|
def _add_existing_bookmarks(self):
|
||||||
target.write("InfoValue: " + self.title + "\n")
|
self.bookmarks.sort()
|
||||||
|
|
||||||
self.bookmarks.sort()
|
bookmarks = self.bookmarks.copy()
|
||||||
|
|
||||||
bookmarks = self.bookmarks.copy()
|
if (self._removeExistingBookmarks() != True):
|
||||||
|
for b in self.oldBookmarks:
|
||||||
|
outer_level = self._get_level_from_page_number(b.page+1)
|
||||||
|
if (self._flattenBookmarks()):
|
||||||
|
increment = 2
|
||||||
|
else:
|
||||||
|
increment = b.level
|
||||||
|
level = outer_level + increment - 1
|
||||||
|
bookmarks.append(Bookmark(b.page+1, b.title, level))
|
||||||
|
|
||||||
if (self._removeExistingBookmarks() != True):
|
bookmarks.sort()
|
||||||
for b in self.oldBookmarks:
|
self.bookmarks = bookmarks
|
||||||
# _logger.info("Checking for %s(%s)", b.title, b.page+1)
|
|
||||||
outer_level = self._get_level_from_page_number(b.page+1)
|
|
||||||
# _logger.info("Got Level: %s", outer_level)
|
|
||||||
if (self._flattenBookmarks()):
|
|
||||||
increment = 2
|
|
||||||
else:
|
|
||||||
increment = b.level
|
|
||||||
level = outer_level + increment - 1
|
|
||||||
bookmarks.append(Bookmark(b.page+1, b.title, level))
|
|
||||||
|
|
||||||
bookmarks.sort()
|
|
||||||
self.bookmarks = bookmarks
|
|
||||||
# self._print_bookmarks()
|
|
||||||
for b in bookmarks:
|
|
||||||
self._add_bookmark(target, b.page, b.title, b.level)
|
|
||||||
|
|
||||||
def _print_bookmarks(self):
|
|
||||||
for b in self.bookmarks:
|
|
||||||
print((" " *( b.level-1)) + b.title + "("+str(b.page)+")")
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Gets the last bookmkark level at a given page number
|
||||||
|
on the combined PDF
|
||||||
|
"""
|
||||||
def _get_level_from_page_number(self, page):
|
def _get_level_from_page_number(self, page):
|
||||||
previousBookmarkLevel = self.bookmarks[0].level
|
previousBookmarkLevel = self.bookmarks[0].level
|
||||||
for b in self.bookmarks:
|
for b in self.bookmarks:
|
||||||
|
@ -122,6 +107,10 @@ class Stitcher:
|
||||||
previousBookmarkLevel = b.level
|
previousBookmarkLevel = b.level
|
||||||
return previousBookmarkLevel
|
return previousBookmarkLevel
|
||||||
|
|
||||||
|
"""
|
||||||
|
Recursive method to read the old bookmarks (which are nested)
|
||||||
|
and push them to self.oldBookmarks
|
||||||
|
"""
|
||||||
def _iterate_old_bookmarks(self, pdf, startPage, bookmarks, level = 1):
|
def _iterate_old_bookmarks(self, pdf, startPage, bookmarks, level = 1):
|
||||||
if (isinstance(bookmarks, list)):
|
if (isinstance(bookmarks, list)):
|
||||||
for inner_bookmark in bookmarks:
|
for inner_bookmark in bookmarks:
|
||||||
|
@ -135,6 +124,7 @@ class Stitcher:
|
||||||
"""
|
"""
|
||||||
Insert the bookmarks into the PDF file
|
Insert the bookmarks into the PDF file
|
||||||
Ref: https://stackoverflow.com/a/18867646
|
Ref: https://stackoverflow.com/a/18867646
|
||||||
|
# TODO: Interleave this into the merge method somehow
|
||||||
"""
|
"""
|
||||||
def _update_metadata(self, old_filename, outputFilename):
|
def _update_metadata(self, old_filename, outputFilename):
|
||||||
stack = []
|
stack = []
|
||||||
|
@ -155,11 +145,17 @@ class Stitcher:
|
||||||
stack.append((b, pdfOutput.addBookmark(b.title, b.page - 1)))
|
stack.append((b, pdfOutput.addBookmark(b.title, b.page - 1)))
|
||||||
pdfOutput.write(open(outputFilename, 'wb'))
|
pdfOutput.write(open(outputFilename, 'wb'))
|
||||||
|
|
||||||
|
"""
|
||||||
|
Merge the PDF files together in order
|
||||||
|
and iterate through the old bookmarks
|
||||||
|
as we're reading them
|
||||||
|
"""
|
||||||
def _merge(self, output):
|
def _merge(self, output):
|
||||||
writer = PdfFileWriter()
|
writer = PdfFileWriter()
|
||||||
for (inputFile,startPage) in self.files:
|
for (inputFile,startPage) in self.files:
|
||||||
assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile)
|
assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile)
|
||||||
reader = PdfFileReader(open(inputFile, 'rb'))
|
reader = PdfFileReader(open(inputFile, 'rb'))
|
||||||
|
# Recursively iterate through the old bookmarks
|
||||||
self._iterate_old_bookmarks(reader, startPage, reader.getOutlines())
|
self._iterate_old_bookmarks(reader, startPage, reader.getOutlines())
|
||||||
for page in range(1, reader.getNumPages()+1):
|
for page in range(1, reader.getNumPages()+1):
|
||||||
writer.addPage(reader.getPage(page - 1))
|
writer.addPage(reader.getPage(page - 1))
|
||||||
|
@ -167,13 +163,15 @@ class Stitcher:
|
||||||
writer.write(output)
|
writer.write(output)
|
||||||
output.close()
|
output.close()
|
||||||
|
|
||||||
|
"""
|
||||||
|
Main entrypoint to generate the final PDF
|
||||||
|
"""
|
||||||
def generate(self, outputFilename, cleanup = False):
|
def generate(self, outputFilename, cleanup = False):
|
||||||
|
|
||||||
tempPdf = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
|
tempPdf = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
|
||||||
tempMetadataFile = tempfile.NamedTemporaryFile(suffix=".txt", delete=False)
|
|
||||||
|
|
||||||
self._merge(tempPdf)
|
self._merge(tempPdf)
|
||||||
self._generate_metadata(tempMetadataFile.name)
|
# Only read the additional bookmarks if we're not removing them
|
||||||
|
if (not self._removeExistingBookmarks())
|
||||||
|
self._add_existing_bookmarks()
|
||||||
self._update_metadata(tempPdf.name, outputFilename)
|
self._update_metadata(tempPdf.name, outputFilename)
|
||||||
|
|
||||||
if (cleanup):
|
if (cleanup):
|
||||||
|
@ -181,4 +179,5 @@ class Stitcher:
|
||||||
os.remove(tempMetadataFile.name)
|
os.remove(tempMetadataFile.name)
|
||||||
os.remove(tempPdf.name)
|
os.remove(tempPdf.name)
|
||||||
else:
|
else:
|
||||||
|
# Why print? Because this is not logging, this is output
|
||||||
print("Temporary files saved as ", tempPdf.name, tempMetadataFile.name)
|
print("Temporary files saved as ", tempPdf.name, tempMetadataFile.name)
|
||||||
|
|
Loading…
Reference in New Issue