Merge pull request #10 from Vonter/feature/page_filter

Add PDF page selection/filter
This commit is contained in:
Vonter 2021-06-27 00:12:17 +05:30 committed by GitHub
commit 1324c2e4aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 54 additions and 14 deletions

View File

@ -88,18 +88,27 @@ Configuration options can be specified with Meta data at the top of the file.
| | for more details. | | | for more details. |
+---------------------+--------------------------------------------------------------------------+ +---------------------+--------------------------------------------------------------------------+
Additionally, PDF links specified in markdown can have attributes to alter the PDFs before merging:: Additionally, PDF links specified in markdown can have attributes to alter the PDFs before merging. The below attribute will rotate the second PDF file by 90 degrees clockwise before merging::
[Part 1](1.pdf) [Part 1](1.pdf)
[Part 2](2.pdf){: rotate="90"} [Part 2](2.pdf){: rotate="90"}
The above will rotate the second PDF file by 90 degrees clockwise before merging. List of attributes: And the below attribute will merge only pages 2 to 5, both inclusive, from the second PDF file::
+---------------------+---------------------------------------------+ [Part 1](1.pdf)
| Attribute | Notes | [Part 2](2.pdf){: start=2 end=5}
+=====================+=============================================+
| rotate | Rotate the PDF. Valid values are 90,180,270 | The list of available attributes are:
+---------------------+---------------------------------------------+
List of attributes:
+---------------------+-----------------------------------------------+
| Attribute | Notes |
+=====================+===============================================+
| rotate | Rotate the PDF. Valid values are 90, 180, 270 |
| start | Start page number for PDF page selection |
| end | End page number for PDF page selection |
+---------------------+-----------------------------------------------+
Documentation Documentation
============= =============

View File

@ -24,6 +24,10 @@ class Stitcher:
DEFAULT_FIT = '/FitV' DEFAULT_FIT = '/FitV'
# Do not rotate by default # Do not rotate by default
DEFAULT_ROTATE = 0 DEFAULT_ROTATE = 0
# Start at page 1 by default
DEFAULT_START = 1
# End at the final page by default
DEFAULT_END = None
# TODO: This is a hack # TODO: This is a hack
os.chdir(self.dir) os.chdir(self.dir)
@ -34,6 +38,8 @@ class Stitcher:
self.attributes = md.Meta self.attributes = md.Meta
self.defaultFit = self._getAttribute('fit', DEFAULT_FIT) self.defaultFit = self._getAttribute('fit', DEFAULT_FIT)
self.defaultRotate = self._getAttribute('rotate', DEFAULT_ROTATE) self.defaultRotate = self._getAttribute('rotate', DEFAULT_ROTATE)
self.defaultStart = self._getAttribute('start', DEFAULT_START)
self.defaultEnd = self._getAttribute('end', DEFAULT_END)
document = html5lib.parseFragment(html, namespaceHTMLElements=False) document = html5lib.parseFragment(html, namespaceHTMLElements=False)
for e in document.iter(): for e in document.iter():
@ -92,11 +98,14 @@ class Stitcher:
self.currentLevel = 3 self.currentLevel = 3
elif(tag =='a'): elif(tag =='a'):
file = element.attrib.get('href') file = element.attrib.get('href')
rotate = element.attrib.get('rotate', self.defaultRotate)
fit = element.attrib.get('fit', self.defaultFit) fit = element.attrib.get('fit', self.defaultFit)
rotate = int(element.attrib.get('rotate', self.defaultRotate))
start = int(element.attrib.get('start', self.defaultStart))
end = int(element.attrib.get('end', self._get_pdf_number_of_pages(file) if self.defaultEnd is None else self.defaultEnd))
filters = (rotate, start, end)
b = Bookmark(self.currentPage, element.text, self.currentLevel+1, fit) b = Bookmark(self.currentPage, element.text, self.currentLevel+1, fit)
self.files.append((file, self.currentPage, rotate)) self.files.append((file, self.currentPage, filters))
self.currentPage += self._get_pdf_number_of_pages(file) self.currentPage += (end - start) + 1
if b: if b:
self.bookmarks.append(b) self.bookmarks.append(b)
@ -133,7 +142,7 @@ class Stitcher:
self.bookmarks = bookmarks self.bookmarks = bookmarks
""" """
Gets the last bookmkark level at a given page number Gets the last bookmark level at a given page number
on the combined PDF on the combined PDF
""" """
def _get_level_from_page_number(self, page): def _get_level_from_page_number(self, page):
@ -190,13 +199,14 @@ class Stitcher:
""" """
def _merge(self, output): def _merge(self, output):
writer = PdfFileWriter() writer = PdfFileWriter()
for (inputFile,startPage,rotate) in self.files: for (inputFile,startPage,filters) in self.files:
assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile) assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile)
reader = PdfFileReader(open(inputFile, 'rb')) reader = PdfFileReader(open(inputFile, 'rb'))
# Recursively iterate through the old bookmarks # Recursively iterate through the old bookmarks
self._iterate_old_bookmarks(reader, startPage, reader.getOutlines()) self._iterate_old_bookmarks(reader, startPage, reader.getOutlines())
for page in range(1, reader.getNumPages()+1): rotate, start, end = filters
writer.addPage(reader.getPage(page - 1).rotateClockwise(int(rotate))) for page in range(start, end + 1):
writer.addPage(reader.getPage(page - 1).rotateClockwise(rotate))
writer.write(output) writer.write(output)
output.close() output.close()

21
tests/book-page-select.md Normal file
View File

@ -0,0 +1,21 @@
existing_bookmarks: remove
author: Wiki, the Cat
subject: A book about adventures of Wiki, the cat.
keywords: wiki,potato,jelly
# Super Potato Book
# Volume 1
[Part 1](1.pdf){: start=1 end=2}
# Volume 2
[Part 2](2.pdf){: start=2}
# Volume 3
[Part 3](1.pdf){: end=2}
# Volume 4
[Part 4](2.pdf){: start=1 end=3 rotate="90"}