pystitcher/src/pystitcher/stitcher.py

import os
import logging
import shutil
import tempfile
import urllib.request
import validators

import html5lib
import markdown

from PyPDF3 import PdfFileWriter, PdfFileReader
from PyPDF3.generic import FloatObject
from pystitcher import __version__
from .bookmark import Bookmark

_logger = logging.getLogger(__name__)

""" Main Stitcher class """
class Stitcher:
    def __init__(self, inputBuffer):
        self.files = []
        self.currentPage = 1
        self.title = None
        self.bookmarks = []
        self.currentLevel = 0
        self.oldBookmarks = []
        self.dir = os.path.dirname(os.path.abspath(inputBuffer.name))
        # Fit complete page width by default
        DEFAULT_FIT = '/FitV'
        # Do not rotate by default
        DEFAULT_ROTATE = 0
        # Start at page 1 by default
        DEFAULT_START = 1
        # End at the final page by default
        DEFAULT_END = None

        # TODO: This is a hack
        os.chdir(self.dir)

        text = inputBuffer.read()
        md = markdown.Markdown(extensions=['attr_list', 'meta'])
        html = md.convert(text)
        self.attributes = md.Meta
        self.defaultFit = self._getAttribute('fit', DEFAULT_FIT)
        self.defaultRotate = self._getAttribute('rotate', DEFAULT_ROTATE)
        self.defaultStart = self._getAttribute('start', DEFAULT_START)
        self.defaultEnd = self._getAttribute('end', DEFAULT_END)

        document = html5lib.parseFragment(html, namespaceHTMLElements=False)
        for e in document.iter():
            self.iter(e)

    """
    Check if file has been cached locally and if
    not cached, download from provided URL. Return
    download filename
    """
    def _cacheURL(self, url):
        if not os.path.exists(os.path.basename(url)):
            _logger.info("Downloading PDF from remote URL %s", url)
            with urllib.request.urlopen(url) as response, open(os.path.basename(url), 'wb') as downloadedFile:
                shutil.copyfileobj(response, downloadedFile)
        else:
            _logger.info("Locally cached PDF found at %s", os.path.basename(url))
        return os.path.basename(url)

    """
    Get the number of pages in a PDF file
    """
    def _get_pdf_number_of_pages(self, filename):
        assert os.path.isfile(filename) and os.access(filename, os.R_OK), \
                "File {} doesn't exist or isn't readable".format(filename)
        pdf_reader = PdfFileReader(open(filename, "rb"))
        return pdf_reader.numPages

    """
    Return an attribute with a default value of None
    """
    def _getAttribute(self, key, default=None):
        return self.attributes.get(key, [default])[0]

    def _getMetadata(self):
        meta = {'/Producer': "pystitcher/%s" % __version__, '/Creator': "pystitcher/%s" % __version__}
        if (self._getAttribute('author')):
            meta["/Author"] = self._getAttribute('author')
        if (self._getAttribute('title')):
            meta["/Title"] = self._getAttribute('title')
        elif self.title:
            meta["/Title"] = self.title
        if (self._getAttribute('subject')):
            meta["/Subject"] = self._getAttribute('subject')
        if (self._getAttribute('keywords')):
            meta["/Keywords"] = self._getAttribute('keywords')

        return meta

    """
    Iterate through the elements in the spine HTML
    and generate self.bookmarks + self.files
    """
    def iter(self, element):
        tag = element.tag
        b = None
        if(tag=='h1'):
            if (self.title == None):
                self.title = element.text
            fit = element.attrib.get('fit', self.defaultFit)
            b = Bookmark(self.currentPage, element.text, 1, fit)
            self.currentLevel = 1
        elif(tag=='h2'):
            fit = element.attrib.get('fit', self.defaultFit)
            b = Bookmark(self.currentPage, element.text, 2, fit)
            self.currentLevel = 2
        elif(tag =='h3'):
            fit = element.attrib.get('fit', self.defaultFit)
            b = Bookmark(self.currentPage, element.text, 3, fit)
            self.currentLevel = 3
        elif(tag =='a'):
            file = element.attrib.get('href')
            if(validators.url(file)):
                file = self._cacheURL(file)
            fit = element.attrib.get('fit', self.defaultFit)
            rotate = int(element.attrib.get('rotate', self.defaultRotate))
            start = int(element.attrib.get('start', self.defaultStart))
            end = int(element.attrib.get('end', self._get_pdf_number_of_pages(file)
                                         if self.defaultEnd is None else self.defaultEnd))
            filters = (rotate, start, end)
            b = Bookmark(self.currentPage, element.text, self.currentLevel+1, fit)
            self.files.append((file, self.currentPage, filters))
            self.currentPage += (end - start) + 1
        if b:
            self.bookmarks.append(b)

    def _existingBookmarkConfig(self):
        EXISTING_BOOKMARKS_DEFAULT = 'remove'
        return self._getAttribute('existing_bookmarks', EXISTING_BOOKMARKS_DEFAULT)

    def _removeExistingBookmarks(self):
        return (self._existingBookmarkConfig() == 'remove')

    def _flattenBookmarks(self):
        return (self._existingBookmarkConfig() == 'flatten')

    """
    Adds the existing bookmarks into the
    self.bookmarks list
    """
    def _add_existing_bookmarks(self):
        self.bookmarks.sort()

        bookmarks = self.bookmarks.copy()

        if (self._removeExistingBookmarks() != True):
            for b in self.oldBookmarks:
                outer_level = self._get_level_from_page_number(b.page+1)
                if (self._flattenBookmarks()):
                    increment = 2
                else:
                    increment = b.level
                level = outer_level + increment - 1
                bookmarks.append(Bookmark(b.page+1, b.title, level, b.fit))

        bookmarks.sort()
        self.bookmarks = bookmarks

    """
    Gets the last bookmark level at a given page number
    on the combined PDF
    """
    def _get_level_from_page_number(self, page):
        previousBookmarkLevel = self.bookmarks[0].level
        for b in self.bookmarks:
            # _logger.info("testing: %s (P%s) [L%s]", b.title, b.page, b.level)
            if (b.page > page):
                # _logger.info("Returning L%s", previousBookmarkLevel)
                return previousBookmarkLevel
            previousBookmarkLevel = b.level
        return previousBookmarkLevel

    """
    Recursive method to read the old bookmarks (which are nested)
    and push them to self.oldBookmarks
    """
    def _iterate_old_bookmarks(self, pdf, startPage, bookmarks, level = 1):
        if (isinstance(bookmarks, list)):
            for inner_bookmark in bookmarks:
                self._iterate_old_bookmarks(pdf, startPage, inner_bookmark, level+1)
        else:
            localPageNumber = pdf.getDestinationPageNumber(bookmarks)
            globalPageNumber = startPage + localPageNumber - 1
            b = Bookmark(globalPageNumber, bookmarks.title, level, self.defaultFit)
            self.oldBookmarks.append(b)

    """
    Insert the bookmarks into the PDF file
    Ref: https://stackoverflow.com/a/18867646
    # TODO: Interleave this into the merge method somehow
    """
    def _insert_bookmarks(self, old_filename, outputFilename):
        stack = []
        pdfInput = PdfFileReader(open(old_filename, 'rb'))
        pdfOutput = PdfFileWriter()
        pdfOutput.cloneDocumentFromReader(pdfInput)
        for b in self.bookmarks:
            existingRef = None
            # Trim the stack till the top is useful (stack.level < b.level)
            while len(stack) > 0 and stack[len(stack)-1][0].level >= b.level:
                stack.pop()
            # If stack has something, use it
            if (len(stack) > 0):
                existingRef = stack[len(stack) - 1][1]
            bookmargArgs = [b.title, b.page-1, existingRef, None, False, False, b.fit] + b.cords
            stack.append((b, pdfOutput.addBookmark(*bookmargArgs)))
        pdfOutput.addMetadata(self._getMetadata())
        pdfOutput.write(open(outputFilename, 'wb'))

    """
    Merge the PDF files together in order
    and iterate through the old bookmarks
    as we're reading them
    """
    def _merge(self, output):
        writer = PdfFileWriter()
        for (inputFile,startPage,filters) in self.files:
            assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile)
            reader = PdfFileReader(open(inputFile, 'rb'))
            # Recursively iterate through the old bookmarks
            self._iterate_old_bookmarks(reader, startPage, reader.getOutlines())
            rotate, start, end = filters
            for page in range(start, end + 1):
                writer.addPage(reader.getPage(page - 1).rotateClockwise(rotate))

        writer.write(output)
        output.close()

    """
    Main entrypoint to generate the final PDF
    """
    def generate(self, outputFilename, cleanup = False):
        tempPdf = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
        self._merge(tempPdf)
        # Only read the additional bookmarks if we're not removing them
        if (not self._removeExistingBookmarks()):
            self._add_existing_bookmarks()
        self._insert_bookmarks(tempPdf.name, outputFilename)

        if (cleanup):
            _logger.info("Deleting temporary files")
            os.remove(tempPdf.name)
        else:
            # Why print? Because this is not logging, this is output
            print("Temporary PDF file saved as ", tempPdf.name)
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`import os`
Add external URL fetching of PDFs Also changed import order according to PEP8 2021-06-27 12:03:49 +00:00			`import logging`
			`import shutil`
			`import tempfile`
			`import urllib.request`
			`import validators`

Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`import html5lib`
Add external URL fetching of PDFs Also changed import order according to PEP8 2021-06-27 12:03:49 +00:00			`import markdown`

Switch to PyPDF3 2021-05-28 16:08:53 +00:00			`from PyPDF3 import PdfFileWriter, PdfFileReader`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`from PyPDF3.generic import FloatObject`
Adds back metadata support for PDF 2021-05-28 15:59:02 +00:00			`from pystitcher import __version__`
Add external URL fetching of PDFs Also changed import order according to PEP8 2021-06-27 12:03:49 +00:00			`from .bookmark import Bookmark`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00
Logging and temp pdf file 2021-05-26 14:43:59 +00:00			`_logger = logging.getLogger(__name__)`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00
			`""" Main Stitcher class """`
			`class Stitcher:`
			`def __init__(self, inputBuffer):`
			`self.files = []`
			`self.currentPage = 1`
			`self.title = None`
			`self.bookmarks = []`
Fixes the case of heading less markdown 2021-05-28 20:48:43 +00:00			`self.currentLevel = 0`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`self.oldBookmarks = []`
			`self.dir = os.path.dirname(os.path.abspath(inputBuffer.name))`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`# Fit complete page width by default`
			`DEFAULT_FIT = '/FitV'`
Added PDF rotation filter 2021-06-25 06:41:24 +00:00			`# Do not rotate by default`
			`DEFAULT_ROTATE = 0`
Add PDF page selection/filter 2021-06-26 17:26:38 +00:00			`# Start at page 1 by default`
			`DEFAULT_START = 1`
Make defaultEnd correspond to absolute page number 2021-06-26 18:33:57 +00:00			`# End at the final page by default`
			`DEFAULT_END = None`
Fully support fit levels 2021-05-28 17:11:57 +00:00
Fix existing bookmark numbering 2021-05-26 16:25:51 +00:00			`# TODO: This is a hack`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`os.chdir(self.dir)`

			`text = inputBuffer.read()`
Add support for removing inner bookmarks 2021-05-26 16:14:26 +00:00			`md = markdown.Markdown(extensions=['attr_list', 'meta'])`
			`html = md.convert(text)`
			`self.attributes = md.Meta`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`self.defaultFit = self._getAttribute('fit', DEFAULT_FIT)`
Added PDF rotation filter 2021-06-25 06:41:24 +00:00			`self.defaultRotate = self._getAttribute('rotate', DEFAULT_ROTATE)`
Add PDF page selection/filter 2021-06-26 17:26:38 +00:00			`self.defaultStart = self._getAttribute('start', DEFAULT_START)`
			`self.defaultEnd = self._getAttribute('end', DEFAULT_END)`
Add support for removing inner bookmarks 2021-05-26 16:14:26 +00:00
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`document = html5lib.parseFragment(html, namespaceHTMLElements=False)`
			`for e in document.iter():`
			`self.iter(e)`

Add external URL fetching of PDFs Also changed import order according to PEP8 2021-06-27 12:03:49 +00:00			`"""`
			`Check if file has been cached locally and if`
			`not cached, download from provided URL. Return`
			`download filename`
			`"""`
			`def _cacheURL(self, url):`
			`if not os.path.exists(os.path.basename(url)):`
			`_logger.info("Downloading PDF from remote URL %s", url)`
			`with urllib.request.urlopen(url) as response, open(os.path.basename(url), 'wb') as downloadedFile:`
			`shutil.copyfileobj(response, downloadedFile)`
			`else:`
Fix logged filename for locally cached file 2021-06-27 12:13:09 +00:00			`_logger.info("Locally cached PDF found at %s", os.path.basename(url))`
Add external URL fetching of PDFs Also changed import order according to PEP8 2021-06-27 12:03:49 +00:00			`return os.path.basename(url)`

Adds back metadata support for PDF 2021-05-28 15:59:02 +00:00			`"""`
			`Get the number of pages in a PDF file`
			`"""`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`def _get_pdf_number_of_pages(self, filename):`
			`assert os.path.isfile(filename) and os.access(filename, os.R_OK), \`
			`"File {} doesn't exist or isn't readable".format(filename)`
			`pdf_reader = PdfFileReader(open(filename, "rb"))`
			`return pdf_reader.numPages`

Adds back metadata support for PDF 2021-05-28 15:59:02 +00:00			`"""`
			`Return an attribute with a default value of None`
			`"""`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`def _getAttribute(self, key, default=None):`
			`return self.attributes.get(key, [default])[0]`
Fix existing bookmark numbering 2021-05-26 16:25:51 +00:00
Adds back metadata support for PDF 2021-05-28 15:59:02 +00:00			`def _getMetadata(self):`
			`meta = {'/Producer': "pystitcher/%s" % __version__, '/Creator': "pystitcher/%s" % __version__}`
			`if (self._getAttribute('author')):`
			`meta["/Author"] = self._getAttribute('author')`
			`if (self._getAttribute('title')):`
			`meta["/Title"] = self._getAttribute('title')`
			`elif self.title:`
			`meta["/Title"] = self.title`
			`if (self._getAttribute('subject')):`
			`meta["/Subject"] = self._getAttribute('subject')`
			`if (self._getAttribute('keywords')):`
			`meta["/Keywords"] = self._getAttribute('keywords')`

			`return meta`

Fully support fit levels 2021-05-28 17:11:57 +00:00			`"""`
			`Iterate through the elements in the spine HTML`
			`and generate self.bookmarks + self.files`
			`"""`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`def iter(self, element):`
			`tag = element.tag`
			`b = None`
			`if(tag=='h1'):`
			`if (self.title == None):`
			`self.title = element.text`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`fit = element.attrib.get('fit', self.defaultFit)`
			`b = Bookmark(self.currentPage, element.text, 1, fit)`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`self.currentLevel = 1`
			`elif(tag=='h2'):`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`fit = element.attrib.get('fit', self.defaultFit)`
			`b = Bookmark(self.currentPage, element.text, 2, fit)`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`self.currentLevel = 2`
			`elif(tag =='h3'):`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`fit = element.attrib.get('fit', self.defaultFit)`
			`b = Bookmark(self.currentPage, element.text, 3, fit)`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`self.currentLevel = 3`
			`elif(tag =='a'):`
			`file = element.attrib.get('href')`
Add external URL fetching of PDFs Also changed import order according to PEP8 2021-06-27 12:03:49 +00:00			`if(validators.url(file)):`
			`file = self._cacheURL(file)`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`fit = element.attrib.get('fit', self.defaultFit)`
Add PDF page selection/filter 2021-06-26 17:26:38 +00:00			`rotate = int(element.attrib.get('rotate', self.defaultRotate))`
			`start = int(element.attrib.get('start', self.defaultStart))`
Add external URL fetching of PDFs Also changed import order according to PEP8 2021-06-27 12:03:49 +00:00			`end = int(element.attrib.get('end', self._get_pdf_number_of_pages(file)`
			`if self.defaultEnd is None else self.defaultEnd))`
Add PDF page selection/filter 2021-06-26 17:26:38 +00:00			`filters = (rotate, start, end)`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`b = Bookmark(self.currentPage, element.text, self.currentLevel+1, fit)`
Add PDF page selection/filter 2021-06-26 17:26:38 +00:00			`self.files.append((file, self.currentPage, filters))`
			`self.currentPage += (end - start) + 1`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`if b:`
			`self.bookmarks.append(b)`

Add support for removing inner bookmarks 2021-05-26 16:14:26 +00:00			`def _existingBookmarkConfig(self):`
Sets default value for existing_bookmarks 2021-05-28 17:34:50 +00:00			`EXISTING_BOOKMARKS_DEFAULT = 'remove'`
			`return self._getAttribute('existing_bookmarks', EXISTING_BOOKMARKS_DEFAULT)`
Add support for removing inner bookmarks 2021-05-26 16:14:26 +00:00
Fix existing bookmark numbering 2021-05-26 16:25:51 +00:00			`def _removeExistingBookmarks(self):`
			`return (self._existingBookmarkConfig() == 'remove')`
Add support for removing inner bookmarks 2021-05-26 16:14:26 +00:00
			`def _flattenBookmarks(self):`
Fix existing bookmark numbering 2021-05-26 16:25:51 +00:00			`return (self._existingBookmarkConfig() == 'flatten')`
Add support for removing inner bookmarks 2021-05-26 16:14:26 +00:00
Remove old metadata text file code 2021-05-28 15:45:51 +00:00			`"""`
Added PDF rotation filter 2021-06-25 06:41:24 +00:00			`Adds the existing bookmarks into the`
Remove old metadata text file code 2021-05-28 15:45:51 +00:00			`self.bookmarks list`
			`"""`
			`def _add_existing_bookmarks(self):`
			`self.bookmarks.sort()`

			`bookmarks = self.bookmarks.copy()`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00
Remove old metadata text file code 2021-05-28 15:45:51 +00:00			`if (self._removeExistingBookmarks() != True):`
			`for b in self.oldBookmarks:`
			`outer_level = self._get_level_from_page_number(b.page+1)`
			`if (self._flattenBookmarks()):`
			`increment = 2`
			`else:`
			`increment = b.level`
			`level = outer_level + increment - 1`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`bookmarks.append(Bookmark(b.page+1, b.title, level, b.fit))`
Remove old metadata text file code 2021-05-28 15:45:51 +00:00
			`bookmarks.sort()`
			`self.bookmarks = bookmarks`

			`"""`
Add PDF page selection/filter 2021-06-26 17:26:38 +00:00			`Gets the last bookmark level at a given page number`
Remove old metadata text file code 2021-05-28 15:45:51 +00:00			`on the combined PDF`
			`"""`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`def _get_level_from_page_number(self, page):`
Fixes clean bookmark insertion. 2021-05-28 13:22:57 +00:00			`previousBookmarkLevel = self.bookmarks[0].level`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`for b in self.bookmarks:`
Fixes clean bookmark insertion. 2021-05-28 13:22:57 +00:00			`# _logger.info("testing: %s (P%s) [L%s]", b.title, b.page, b.level)`
Find last bookmark on that page 2021-05-26 16:47:24 +00:00			`if (b.page > page):`
Fixes clean bookmark insertion. 2021-05-28 13:22:57 +00:00			`# _logger.info("Returning L%s", previousBookmarkLevel)`
			`return previousBookmarkLevel`
			`previousBookmarkLevel = b.level`
			`return previousBookmarkLevel`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00
Remove old metadata text file code 2021-05-28 15:45:51 +00:00			`"""`
			`Recursive method to read the old bookmarks (which are nested)`
			`and push them to self.oldBookmarks`
			`"""`
Import old bookmarks via PyPDF 2021-05-26 15:26:18 +00:00			`def _iterate_old_bookmarks(self, pdf, startPage, bookmarks, level = 1):`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`if (isinstance(bookmarks, list)):`
			`for inner_bookmark in bookmarks:`
Import old bookmarks via PyPDF 2021-05-26 15:26:18 +00:00			`self._iterate_old_bookmarks(pdf, startPage, inner_bookmark, level+1)`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`else:`
Import old bookmarks via PyPDF 2021-05-26 15:26:18 +00:00			`localPageNumber = pdf.getDestinationPageNumber(bookmarks)`
Fix existing bookmark numbering 2021-05-26 16:25:51 +00:00			`globalPageNumber = startPage + localPageNumber - 1`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`b = Bookmark(globalPageNumber, bookmarks.title, level, self.defaultFit)`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`self.oldBookmarks.append(b)`

Drops pdftkbox entirely 2021-05-28 15:05:48 +00:00			`"""`
			`Insert the bookmarks into the PDF file`
			`Ref: https://stackoverflow.com/a/18867646`
Remove old metadata text file code 2021-05-28 15:45:51 +00:00			`# TODO: Interleave this into the merge method somehow`
Drops pdftkbox entirely 2021-05-28 15:05:48 +00:00			`"""`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`def _insert_bookmarks(self, old_filename, outputFilename):`
Drops pdftkbox entirely 2021-05-28 15:05:48 +00:00			`stack = []`
			`pdfInput = PdfFileReader(open(old_filename, 'rb'))`
			`pdfOutput = PdfFileWriter()`
			`pdfOutput.cloneDocumentFromReader(pdfInput)`
			`for b in self.bookmarks:`
			`existingRef = None`
			`# Trim the stack till the top is useful (stack.level < b.level)`
			`while len(stack) > 0 and stack[len(stack)-1][0].level >= b.level:`
			`stack.pop()`
			`# If stack has something, use it`
			`if (len(stack) > 0):`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`existingRef = stack[len(stack) - 1][1]`
			`bookmargArgs = [b.title, b.page-1, existingRef, None, False, False, b.fit] + b.cords`
			`stack.append((b, pdfOutput.addBookmark(*bookmargArgs)))`
Adds back metadata support for PDF 2021-05-28 15:59:02 +00:00			`pdfOutput.addMetadata(self._getMetadata())`
Drops pdftkbox entirely 2021-05-28 15:05:48 +00:00			`pdfOutput.write(open(outputFilename, 'wb'))`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00
Remove old metadata text file code 2021-05-28 15:45:51 +00:00			`"""`
			`Merge the PDF files together in order`
			`and iterate through the old bookmarks`
			`as we're reading them`
			`"""`
Import old bookmarks via PyPDF 2021-05-26 15:26:18 +00:00			`def _merge(self, output):`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`writer = PdfFileWriter()`
Add PDF page selection/filter 2021-06-26 17:26:38 +00:00			`for (inputFile,startPage,filters) in self.files:`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00			`assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile)`
			`reader = PdfFileReader(open(inputFile, 'rb'))`
Remove old metadata text file code 2021-05-28 15:45:51 +00:00			`# Recursively iterate through the old bookmarks`
Import old bookmarks via PyPDF 2021-05-26 15:26:18 +00:00			`self._iterate_old_bookmarks(reader, startPage, reader.getOutlines())`
Add PDF page selection/filter 2021-06-26 17:26:38 +00:00			`rotate, start, end = filters`
			`for page in range(start, end + 1):`
			`writer.addPage(reader.getPage(page - 1).rotateClockwise(rotate))`
Added PDF rotation filter 2021-06-25 06:41:24 +00:00
Logging and temp pdf file 2021-05-26 14:43:59 +00:00			`writer.write(output)`
			`output.close()`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00
Remove old metadata text file code 2021-05-28 15:45:51 +00:00			`"""`
			`Main entrypoint to generate the final PDF`
			`"""`
Logging and temp pdf file 2021-05-26 14:43:59 +00:00			`def generate(self, outputFilename, cleanup = False):`
			`tempPdf = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)`
Import old bookmarks via PyPDF 2021-05-26 15:26:18 +00:00			`self._merge(tempPdf)`
Remove old metadata text file code 2021-05-28 15:45:51 +00:00			`# Only read the additional bookmarks if we're not removing them`
Adds back metadata support for PDF 2021-05-28 15:59:02 +00:00			`if (not self._removeExistingBookmarks()):`
Remove old metadata text file code 2021-05-28 15:45:51 +00:00			`self._add_existing_bookmarks()`
Fully support fit levels 2021-05-28 17:11:57 +00:00			`self._insert_bookmarks(tempPdf.name, outputFilename)`
Functionally running, but only for me 2021-05-26 13:54:36 +00:00
Logging and temp pdf file 2021-05-26 14:43:59 +00:00			`if (cleanup):`
			`_logger.info("Deleting temporary files")`
			`os.remove(tempPdf.name)`
Add startPage to file list 2021-05-26 14:58:15 +00:00			`else:`
Remove old metadata text file code 2021-05-28 15:45:51 +00:00			`# Why print? Because this is not logging, this is output`
Adds back metadata support for PDF 2021-05-28 15:59:02 +00:00			`print("Temporary PDF file saved as ", tempPdf.name)`