Merge pull request #10 from Vonter/feature/page_filter

Add PDF page selection/filter
This commit is contained in:
Vonter 2021-06-27 00:12:17 +05:30 committed by GitHub
commit 1324c2e4aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 54 additions and 14 deletions

View File

@ -88,18 +88,27 @@ Configuration options can be specified with Meta data at the top of the file.
| | for more details. |
+---------------------+--------------------------------------------------------------------------+
Additionally, PDF links specified in markdown can have attributes to alter the PDFs before merging::
Additionally, PDF links specified in markdown can have attributes to alter the PDFs before merging. The below attribute will rotate the second PDF file by 90 degrees clockwise before merging::
[Part 1](1.pdf)
[Part 2](2.pdf){: rotate="90"}
The above will rotate the second PDF file by 90 degrees clockwise before merging. List of attributes:
And the below attribute will merge only pages 2 to 5, both inclusive, from the second PDF file::
+---------------------+---------------------------------------------+
| Attribute | Notes |
+=====================+=============================================+
| rotate | Rotate the PDF. Valid values are 90,180,270 |
+---------------------+---------------------------------------------+
[Part 1](1.pdf)
[Part 2](2.pdf){: start=2 end=5}
The list of available attributes are:
List of attributes:
+---------------------+-----------------------------------------------+
| Attribute | Notes |
+=====================+===============================================+
| rotate | Rotate the PDF. Valid values are 90, 180, 270 |
| start | Start page number for PDF page selection |
| end | End page number for PDF page selection |
+---------------------+-----------------------------------------------+
Documentation
=============

View File

@ -24,6 +24,10 @@ class Stitcher:
DEFAULT_FIT = '/FitV'
# Do not rotate by default
DEFAULT_ROTATE = 0
# Start at page 1 by default
DEFAULT_START = 1
# End at the final page by default
DEFAULT_END = None
# TODO: This is a hack
os.chdir(self.dir)
@ -34,6 +38,8 @@ class Stitcher:
self.attributes = md.Meta
self.defaultFit = self._getAttribute('fit', DEFAULT_FIT)
self.defaultRotate = self._getAttribute('rotate', DEFAULT_ROTATE)
self.defaultStart = self._getAttribute('start', DEFAULT_START)
self.defaultEnd = self._getAttribute('end', DEFAULT_END)
document = html5lib.parseFragment(html, namespaceHTMLElements=False)
for e in document.iter():
@ -92,11 +98,14 @@ class Stitcher:
self.currentLevel = 3
elif(tag =='a'):
file = element.attrib.get('href')
rotate = element.attrib.get('rotate', self.defaultRotate)
fit = element.attrib.get('fit', self.defaultFit)
rotate = int(element.attrib.get('rotate', self.defaultRotate))
start = int(element.attrib.get('start', self.defaultStart))
end = int(element.attrib.get('end', self._get_pdf_number_of_pages(file) if self.defaultEnd is None else self.defaultEnd))
filters = (rotate, start, end)
b = Bookmark(self.currentPage, element.text, self.currentLevel+1, fit)
self.files.append((file, self.currentPage, rotate))
self.currentPage += self._get_pdf_number_of_pages(file)
self.files.append((file, self.currentPage, filters))
self.currentPage += (end - start) + 1
if b:
self.bookmarks.append(b)
@ -133,7 +142,7 @@ class Stitcher:
self.bookmarks = bookmarks
"""
Gets the last bookmkark level at a given page number
Gets the last bookmark level at a given page number
on the combined PDF
"""
def _get_level_from_page_number(self, page):
@ -190,13 +199,14 @@ class Stitcher:
"""
def _merge(self, output):
writer = PdfFileWriter()
for (inputFile,startPage,rotate) in self.files:
for (inputFile,startPage,filters) in self.files:
assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile)
reader = PdfFileReader(open(inputFile, 'rb'))
# Recursively iterate through the old bookmarks
self._iterate_old_bookmarks(reader, startPage, reader.getOutlines())
for page in range(1, reader.getNumPages()+1):
writer.addPage(reader.getPage(page - 1).rotateClockwise(int(rotate)))
rotate, start, end = filters
for page in range(start, end + 1):
writer.addPage(reader.getPage(page - 1).rotateClockwise(rotate))
writer.write(output)
output.close()

21
tests/book-page-select.md Normal file
View File

@ -0,0 +1,21 @@
existing_bookmarks: remove
author: Wiki, the Cat
subject: A book about adventures of Wiki, the cat.
keywords: wiki,potato,jelly
# Super Potato Book
# Volume 1
[Part 1](1.pdf){: start=1 end=2}
# Volume 2
[Part 2](2.pdf){: start=2}
# Volume 3
[Part 3](1.pdf){: end=2}
# Volume 4
[Part 4](2.pdf){: start=1 end=3 rotate="90"}