Merge pull request #10 from Vonter/feature/page_filter
Add PDF page selection/filter
This commit is contained in:
commit
1324c2e4aa
23
README.rst
23
README.rst
|
@ -88,18 +88,27 @@ Configuration options can be specified with Meta data at the top of the file.
|
||||||
| | for more details. |
|
| | for more details. |
|
||||||
+---------------------+--------------------------------------------------------------------------+
|
+---------------------+--------------------------------------------------------------------------+
|
||||||
|
|
||||||
Additionally, PDF links specified in markdown can have attributes to alter the PDFs before merging::
|
Additionally, PDF links specified in markdown can have attributes to alter the PDFs before merging. The below attribute will rotate the second PDF file by 90 degrees clockwise before merging::
|
||||||
|
|
||||||
[Part 1](1.pdf)
|
[Part 1](1.pdf)
|
||||||
[Part 2](2.pdf){: rotate="90"}
|
[Part 2](2.pdf){: rotate="90"}
|
||||||
|
|
||||||
The above will rotate the second PDF file by 90 degrees clockwise before merging. List of attributes:
|
And the below attribute will merge only pages 2 to 5, both inclusive, from the second PDF file::
|
||||||
|
|
||||||
+---------------------+---------------------------------------------+
|
[Part 1](1.pdf)
|
||||||
| Attribute | Notes |
|
[Part 2](2.pdf){: start=2 end=5}
|
||||||
+=====================+=============================================+
|
|
||||||
| rotate | Rotate the PDF. Valid values are 90,180,270 |
|
The list of available attributes are:
|
||||||
+---------------------+---------------------------------------------+
|
|
||||||
|
List of attributes:
|
||||||
|
|
||||||
|
+---------------------+-----------------------------------------------+
|
||||||
|
| Attribute | Notes |
|
||||||
|
+=====================+===============================================+
|
||||||
|
| rotate | Rotate the PDF. Valid values are 90, 180, 270 |
|
||||||
|
| start | Start page number for PDF page selection |
|
||||||
|
| end | End page number for PDF page selection |
|
||||||
|
+---------------------+-----------------------------------------------+
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
=============
|
=============
|
||||||
|
|
|
@ -24,6 +24,10 @@ class Stitcher:
|
||||||
DEFAULT_FIT = '/FitV'
|
DEFAULT_FIT = '/FitV'
|
||||||
# Do not rotate by default
|
# Do not rotate by default
|
||||||
DEFAULT_ROTATE = 0
|
DEFAULT_ROTATE = 0
|
||||||
|
# Start at page 1 by default
|
||||||
|
DEFAULT_START = 1
|
||||||
|
# End at the final page by default
|
||||||
|
DEFAULT_END = None
|
||||||
|
|
||||||
# TODO: This is a hack
|
# TODO: This is a hack
|
||||||
os.chdir(self.dir)
|
os.chdir(self.dir)
|
||||||
|
@ -34,6 +38,8 @@ class Stitcher:
|
||||||
self.attributes = md.Meta
|
self.attributes = md.Meta
|
||||||
self.defaultFit = self._getAttribute('fit', DEFAULT_FIT)
|
self.defaultFit = self._getAttribute('fit', DEFAULT_FIT)
|
||||||
self.defaultRotate = self._getAttribute('rotate', DEFAULT_ROTATE)
|
self.defaultRotate = self._getAttribute('rotate', DEFAULT_ROTATE)
|
||||||
|
self.defaultStart = self._getAttribute('start', DEFAULT_START)
|
||||||
|
self.defaultEnd = self._getAttribute('end', DEFAULT_END)
|
||||||
|
|
||||||
document = html5lib.parseFragment(html, namespaceHTMLElements=False)
|
document = html5lib.parseFragment(html, namespaceHTMLElements=False)
|
||||||
for e in document.iter():
|
for e in document.iter():
|
||||||
|
@ -92,11 +98,14 @@ class Stitcher:
|
||||||
self.currentLevel = 3
|
self.currentLevel = 3
|
||||||
elif(tag =='a'):
|
elif(tag =='a'):
|
||||||
file = element.attrib.get('href')
|
file = element.attrib.get('href')
|
||||||
rotate = element.attrib.get('rotate', self.defaultRotate)
|
|
||||||
fit = element.attrib.get('fit', self.defaultFit)
|
fit = element.attrib.get('fit', self.defaultFit)
|
||||||
|
rotate = int(element.attrib.get('rotate', self.defaultRotate))
|
||||||
|
start = int(element.attrib.get('start', self.defaultStart))
|
||||||
|
end = int(element.attrib.get('end', self._get_pdf_number_of_pages(file) if self.defaultEnd is None else self.defaultEnd))
|
||||||
|
filters = (rotate, start, end)
|
||||||
b = Bookmark(self.currentPage, element.text, self.currentLevel+1, fit)
|
b = Bookmark(self.currentPage, element.text, self.currentLevel+1, fit)
|
||||||
self.files.append((file, self.currentPage, rotate))
|
self.files.append((file, self.currentPage, filters))
|
||||||
self.currentPage += self._get_pdf_number_of_pages(file)
|
self.currentPage += (end - start) + 1
|
||||||
if b:
|
if b:
|
||||||
self.bookmarks.append(b)
|
self.bookmarks.append(b)
|
||||||
|
|
||||||
|
@ -133,7 +142,7 @@ class Stitcher:
|
||||||
self.bookmarks = bookmarks
|
self.bookmarks = bookmarks
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Gets the last bookmkark level at a given page number
|
Gets the last bookmark level at a given page number
|
||||||
on the combined PDF
|
on the combined PDF
|
||||||
"""
|
"""
|
||||||
def _get_level_from_page_number(self, page):
|
def _get_level_from_page_number(self, page):
|
||||||
|
@ -190,13 +199,14 @@ class Stitcher:
|
||||||
"""
|
"""
|
||||||
def _merge(self, output):
|
def _merge(self, output):
|
||||||
writer = PdfFileWriter()
|
writer = PdfFileWriter()
|
||||||
for (inputFile,startPage,rotate) in self.files:
|
for (inputFile,startPage,filters) in self.files:
|
||||||
assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile)
|
assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile)
|
||||||
reader = PdfFileReader(open(inputFile, 'rb'))
|
reader = PdfFileReader(open(inputFile, 'rb'))
|
||||||
# Recursively iterate through the old bookmarks
|
# Recursively iterate through the old bookmarks
|
||||||
self._iterate_old_bookmarks(reader, startPage, reader.getOutlines())
|
self._iterate_old_bookmarks(reader, startPage, reader.getOutlines())
|
||||||
for page in range(1, reader.getNumPages()+1):
|
rotate, start, end = filters
|
||||||
writer.addPage(reader.getPage(page - 1).rotateClockwise(int(rotate)))
|
for page in range(start, end + 1):
|
||||||
|
writer.addPage(reader.getPage(page - 1).rotateClockwise(rotate))
|
||||||
|
|
||||||
writer.write(output)
|
writer.write(output)
|
||||||
output.close()
|
output.close()
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
existing_bookmarks: remove
|
||||||
|
author: Wiki, the Cat
|
||||||
|
subject: A book about adventures of Wiki, the cat.
|
||||||
|
keywords: wiki,potato,jelly
|
||||||
|
# Super Potato Book
|
||||||
|
|
||||||
|
# Volume 1
|
||||||
|
|
||||||
|
[Part 1](1.pdf){: start=1 end=2}
|
||||||
|
|
||||||
|
# Volume 2
|
||||||
|
|
||||||
|
[Part 2](2.pdf){: start=2}
|
||||||
|
|
||||||
|
# Volume 3
|
||||||
|
|
||||||
|
[Part 3](1.pdf){: end=2}
|
||||||
|
|
||||||
|
# Volume 4
|
||||||
|
|
||||||
|
[Part 4](2.pdf){: start=1 end=3 rotate="90"}
|
Loading…
Reference in New Issue