Fully support fit levels

This commit is contained in:
Nemo 2021-05-28 22:41:57 +05:30
parent 044d21b8bc
commit cd17b81f16
2 changed files with 36 additions and 17 deletions

View File

@ -1,11 +1,19 @@
""" Bookmark class """ """ Bookmark class """
class Bookmark: class Bookmark:
def __init__(self, page, title, level=1): def __init__(self, page, title, level=1, fit='/FitV'):
self.page = page self.page = page
self.title = title self.title = title
self.level = level self.level = level
# default value for now # default value for now
self.fit = '/FitH' self.fit = fit
if (self.fit == '/Fit' or self.fit == '/FitB'):
self.cords = []
elif (self.fit == '/FitH' or self.fit == '/FitV' or self.fit == '/FitBH' or self.fit == '/FitBV'):
self.cords = [(0)]
else:
self.fit = '/FitB'
self.cords = []
def __lt__(self, other): def __lt__(self, other):
return self.page < other.page return self.page < other.page

View File

@ -3,6 +3,7 @@ import markdown
from .bookmark import Bookmark from .bookmark import Bookmark
import html5lib import html5lib
from PyPDF3 import PdfFileWriter, PdfFileReader from PyPDF3 import PdfFileWriter, PdfFileReader
from PyPDF3.generic import FloatObject
from pystitcher import __version__ from pystitcher import __version__
import tempfile import tempfile
import logging import logging
@ -19,6 +20,9 @@ class Stitcher:
self.currentLevel = None self.currentLevel = None
self.oldBookmarks = [] self.oldBookmarks = []
self.dir = os.path.dirname(os.path.abspath(inputBuffer.name)) self.dir = os.path.dirname(os.path.abspath(inputBuffer.name))
# Fit complete page width by default
DEFAULT_FIT = '/FitV'
# TODO: This is a hack # TODO: This is a hack
os.chdir(self.dir) os.chdir(self.dir)
@ -26,6 +30,7 @@ class Stitcher:
md = markdown.Markdown(extensions=['attr_list', 'meta']) md = markdown.Markdown(extensions=['attr_list', 'meta'])
html = md.convert(text) html = md.convert(text)
self.attributes = md.Meta self.attributes = md.Meta
self.defaultFit = self._getAttribute('fit', DEFAULT_FIT)
document = html5lib.parseFragment(html, namespaceHTMLElements=False) document = html5lib.parseFragment(html, namespaceHTMLElements=False)
for e in document.iter(): for e in document.iter():
@ -43,8 +48,8 @@ class Stitcher:
""" """
Return an attribute with a default value of None Return an attribute with a default value of None
""" """
def _getAttribute(self, key): def _getAttribute(self, key, default=None):
return self.attributes.get(key, [None])[0] return self.attributes.get(key, [default])[0]
def _getMetadata(self): def _getMetadata(self):
meta = {'/Producer': "pystitcher/%s" % __version__, '/Creator': "pystitcher/%s" % __version__} meta = {'/Producer': "pystitcher/%s" % __version__, '/Creator': "pystitcher/%s" % __version__}
@ -61,23 +66,31 @@ class Stitcher:
return meta return meta
"""
Iterate through the elements in the spine HTML
and generate self.bookmarks + self.files
"""
def iter(self, element): def iter(self, element):
tag = element.tag tag = element.tag
b = None b = None
if(tag=='h1'): if(tag=='h1'):
if (self.title == None): if (self.title == None):
self.title = element.text self.title = element.text
b = Bookmark(self.currentPage, element.text, 1) fit = element.attrib.get('fit', self.defaultFit)
b = Bookmark(self.currentPage, element.text, 1, fit)
self.currentLevel = 1 self.currentLevel = 1
elif(tag=='h2'): elif(tag=='h2'):
b = Bookmark(self.currentPage, element.text, 2) fit = element.attrib.get('fit', self.defaultFit)
b = Bookmark(self.currentPage, element.text, 2, fit)
self.currentLevel = 2 self.currentLevel = 2
elif(tag =='h3'): elif(tag =='h3'):
b = Bookmark(self.currentPage, element.text, 3) fit = element.attrib.get('fit', self.defaultFit)
b = Bookmark(self.currentPage, element.text, 3, fit)
self.currentLevel = 3 self.currentLevel = 3
elif(tag =='a'): elif(tag =='a'):
file = element.attrib.get('href') file = element.attrib.get('href')
b = Bookmark(self.currentPage, element.text, self.currentLevel+1) fit = element.attrib.get('fit', self.defaultFit)
b = Bookmark(self.currentPage, element.text, self.currentLevel+1, fit)
self.files.append((file, self.currentPage)) self.files.append((file, self.currentPage))
self.currentPage += self._get_pdf_number_of_pages(file) self.currentPage += self._get_pdf_number_of_pages(file)
if b: if b:
@ -109,7 +122,7 @@ class Stitcher:
else: else:
increment = b.level increment = b.level
level = outer_level + increment - 1 level = outer_level + increment - 1
bookmarks.append(Bookmark(b.page+1, b.title, level)) bookmarks.append(Bookmark(b.page+1, b.title, level, b.fit))
bookmarks.sort() bookmarks.sort()
self.bookmarks = bookmarks self.bookmarks = bookmarks
@ -139,7 +152,7 @@ class Stitcher:
else: else:
localPageNumber = pdf.getDestinationPageNumber(bookmarks) localPageNumber = pdf.getDestinationPageNumber(bookmarks)
globalPageNumber = startPage + localPageNumber - 1 globalPageNumber = startPage + localPageNumber - 1
b = Bookmark(globalPageNumber, bookmarks.title, level) b = Bookmark(globalPageNumber, bookmarks.title, level, self.defaultFit)
self.oldBookmarks.append(b) self.oldBookmarks.append(b)
""" """
@ -147,7 +160,7 @@ class Stitcher:
Ref: https://stackoverflow.com/a/18867646 Ref: https://stackoverflow.com/a/18867646
# TODO: Interleave this into the merge method somehow # TODO: Interleave this into the merge method somehow
""" """
def _update_metadata(self, old_filename, outputFilename): def _insert_bookmarks(self, old_filename, outputFilename):
stack = [] stack = []
pdfInput = PdfFileReader(open(old_filename, 'rb')) pdfInput = PdfFileReader(open(old_filename, 'rb'))
pdfOutput = PdfFileWriter() pdfOutput = PdfFileWriter()
@ -159,11 +172,9 @@ class Stitcher:
stack.pop() stack.pop()
# If stack has something, use it # If stack has something, use it
if (len(stack) > 0): if (len(stack) > 0):
existingRef = stack[len(stack) - 1] existingRef = stack[len(stack) - 1][1]
stack.append((b, pdfOutput.addBookmark(b.title, b.page - 1, existingRef[1]))) bookmargArgs = [b.title, b.page-1, existingRef, None, False, False, b.fit] + b.cords
# Else, push to top stack.append((b, pdfOutput.addBookmark(*bookmargArgs)))
else:
stack.append((b, pdfOutput.addBookmark(b.title, b.page - 1)))
pdfOutput.addMetadata(self._getMetadata()) pdfOutput.addMetadata(self._getMetadata())
pdfOutput.write(open(outputFilename, 'wb')) pdfOutput.write(open(outputFilename, 'wb'))
@ -194,7 +205,7 @@ class Stitcher:
# Only read the additional bookmarks if we're not removing them # Only read the additional bookmarks if we're not removing them
if (not self._removeExistingBookmarks()): if (not self._removeExistingBookmarks()):
self._add_existing_bookmarks() self._add_existing_bookmarks()
self._update_metadata(tempPdf.name, outputFilename) self._insert_bookmarks(tempPdf.name, outputFilename)
if (cleanup): if (cleanup):
_logger.info("Deleting temporary files") _logger.info("Deleting temporary files")