mirror of
https://github.com/captn3m0/pystitcher.git
synced 2024-09-10 13:46:45 +00:00
"upgrade" from PyPDF3 to pypdf
I picked the wrong fork (pypdf3 instead of pypdf2). PyPDF2 was a fork from the original pyPdf. After several years, the fork was merged back into pypdf (now all lowercase). pypdf3 is now unmaintained. pypdf meanwhile has had a lot of interesting updates, which I should look at.
This commit is contained in:
parent
16e054fa4d
commit
b22459f64c
1
.gitignore
vendored
1
.gitignore
vendored
@ -22,6 +22,7 @@ __pycache__/*
|
||||
.idea
|
||||
.vscode
|
||||
tags
|
||||
src/pystitcher/_version.py
|
||||
|
||||
# Package files
|
||||
*.egg
|
||||
|
@ -3,7 +3,7 @@
|
||||
pystitcher stitches your PDF files together, generating nice
|
||||
customizable bookmarks for you using a declarative input in the form of
|
||||
a markdown file. It is written in pure python and uses
|
||||
[PyPDF3](https://pypi.org/project/PyPDF3/) for reading and writing PDF
|
||||
[pypdf](https://pypi.org/project/pypdf/) for reading and writing PDF
|
||||
files.
|
||||
|
||||
## Installation
|
||||
|
@ -20,7 +20,7 @@ dependencies = [
|
||||
"html5lib>=1.1",
|
||||
"importlib-metadata; python_version<\"3.8\"",
|
||||
"Markdown>=3.6",
|
||||
"PyPDF3>=1.0.6",
|
||||
"pypdf>=4.3.1",
|
||||
"validators>=0.33.0",
|
||||
]
|
||||
|
||||
|
@ -1,16 +0,0 @@
|
||||
# file generated by setuptools_scm
|
||||
# don't change, don't track in version control
|
||||
TYPE_CHECKING = False
|
||||
if TYPE_CHECKING:
|
||||
from typing import Tuple, Union
|
||||
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
||||
else:
|
||||
VERSION_TUPLE = object
|
||||
|
||||
version: str
|
||||
__version__: str
|
||||
__version_tuple__: VERSION_TUPLE
|
||||
version_tuple: VERSION_TUPLE
|
||||
|
||||
__version__ = version = '1.0.5.dev2+g9a37aa7.d20240812'
|
||||
__version_tuple__ = version_tuple = (1, 0, 5, 'dev2', 'g9a37aa7.d20240812')
|
@ -8,8 +8,8 @@ import validators
|
||||
import html5lib
|
||||
import markdown
|
||||
|
||||
from PyPDF3 import PdfFileWriter, PdfFileReader
|
||||
from PyPDF3.generic import FloatObject
|
||||
from pypdf import PdfWriter, PdfReader
|
||||
from pypdf.generic import Fit
|
||||
from pystitcher import __version__
|
||||
from .bookmark import Bookmark
|
||||
|
||||
@ -70,8 +70,8 @@ class Stitcher:
|
||||
def _get_pdf_number_of_pages(self, filename):
|
||||
assert os.path.isfile(filename) and os.access(filename, os.R_OK), \
|
||||
"File {} doesn't exist or isn't readable".format(filename)
|
||||
pdf_reader = PdfFileReader(open(filename, "rb"))
|
||||
return pdf_reader.numPages
|
||||
pdf_reader = PdfReader(open(filename, "rb"))
|
||||
return pdf_reader.get_num_pages()
|
||||
|
||||
"""
|
||||
Return an attribute with a default value of None
|
||||
@ -186,7 +186,7 @@ class Stitcher:
|
||||
for inner_bookmark in bookmarks:
|
||||
self._iterate_old_bookmarks(pdf, startPage, inner_bookmark, level+1)
|
||||
else:
|
||||
localPageNumber = pdf.getDestinationPageNumber(bookmarks)
|
||||
localPageNumber = pdf.get_destination_page_number(bookmarks)
|
||||
globalPageNumber = startPage + localPageNumber - 1
|
||||
b = Bookmark(globalPageNumber, bookmarks.title, level, self.defaultFit)
|
||||
self.oldBookmarks.append(b)
|
||||
@ -198,9 +198,9 @@ class Stitcher:
|
||||
"""
|
||||
def _insert_bookmarks(self, old_filename, outputFilename):
|
||||
stack = []
|
||||
pdfInput = PdfFileReader(open(old_filename, 'rb'))
|
||||
pdfOutput = PdfFileWriter()
|
||||
pdfOutput.cloneDocumentFromReader(pdfInput)
|
||||
pdfInput = PdfReader(open(old_filename, 'rb'))
|
||||
pdfOutput = PdfWriter()
|
||||
pdfOutput.clone_document_from_reader(pdfInput)
|
||||
for b in self.bookmarks:
|
||||
existingRef = None
|
||||
# Trim the stack till the top is useful (stack.level < b.level)
|
||||
@ -209,9 +209,9 @@ class Stitcher:
|
||||
# If stack has something, use it
|
||||
if (len(stack) > 0):
|
||||
existingRef = stack[len(stack) - 1][1]
|
||||
bookmargArgs = [b.title, b.page-1, existingRef, None, False, False, b.fit] + b.cords
|
||||
stack.append((b, pdfOutput.addBookmark(*bookmargArgs)))
|
||||
pdfOutput.addMetadata(self._getMetadata())
|
||||
bookmargArgs = [b.title, b.page-1, existingRef, None, False, False, Fit(b.fit)] + b.cords
|
||||
stack.append((b, pdfOutput.add_outline_item(*bookmargArgs)))
|
||||
pdfOutput.add_metadata(self._getMetadata())
|
||||
pdfOutput.write(open(outputFilename, 'wb'))
|
||||
|
||||
"""
|
||||
@ -220,15 +220,15 @@ class Stitcher:
|
||||
as we're reading them
|
||||
"""
|
||||
def _merge(self, output):
|
||||
writer = PdfFileWriter()
|
||||
writer = PdfWriter()
|
||||
for (inputFile,startPage,filters) in self.files:
|
||||
assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile)
|
||||
reader = PdfFileReader(open(inputFile, 'rb'))
|
||||
reader = PdfReader(open(inputFile, 'rb'))
|
||||
# Recursively iterate through the old bookmarks
|
||||
self._iterate_old_bookmarks(reader, startPage, reader.getOutlines())
|
||||
self._iterate_old_bookmarks(reader, startPage, reader.outline)
|
||||
rotate, start, end = filters
|
||||
for page in range(start, end + 1):
|
||||
writer.addPage(reader.getPage(page - 1).rotateClockwise(rotate))
|
||||
writer.add_page(reader.get_page(page - 1).rotate(rotate))
|
||||
|
||||
writer.write(output)
|
||||
output.close()
|
||||
|
@ -1,7 +1,7 @@
|
||||
import os
|
||||
import io
|
||||
|
||||
import PyPDF3
|
||||
import pypdf
|
||||
from pystitcher.stitcher import Stitcher
|
||||
from pystitcher import __version__
|
||||
|
||||
@ -55,16 +55,16 @@ def flatten_bookmarks(bookmarks, level=0):
|
||||
|
||||
def get_all_bookmarks(pdf):
|
||||
""" Returns a list of all bookmarks with title, page number, and level in a PDF file"""
|
||||
bookmarks = flatten_bookmarks(pdf.getOutlines())
|
||||
return [(d[0]['/Title'], pdf.getDestinationPageNumber(d[0]), d[1]) for d in bookmarks]
|
||||
bookmarks = flatten_bookmarks(pdf.outline)
|
||||
return [(d[0]['/Title'], pdf.get_destination_page_number(d[0]), d[1]) for d in bookmarks]
|
||||
|
||||
@pytest.mark.parametrize("name,pages,metadata,bookmarks", TEST_DATA)
|
||||
def test_book(name, pages, metadata, bookmarks):
|
||||
output_file = render(name)
|
||||
pdf = PyPDF3.PdfFileReader(output_file)
|
||||
assert pages == pdf.getNumPages()
|
||||
pdf = pypdf.PdfReader(output_file)
|
||||
assert pages == pdf.get_num_pages()
|
||||
assert bookmarks == get_all_bookmarks(pdf)
|
||||
info = pdf.getDocumentInfo()
|
||||
info = pdf.metadata
|
||||
identity = "pystitcher/%s" % __version__
|
||||
assert identity == info['/Producer']
|
||||
assert identity == info['/Creator']
|
||||
@ -74,14 +74,14 @@ def test_book(name, pages, metadata, bookmarks):
|
||||
def test_rotation():
|
||||
""" Validates the book-rotate.pdf with pages rotated."""
|
||||
output_file = render("rotate")
|
||||
pdf = PyPDF3.PdfFileReader(output_file)
|
||||
pdf = pypdf.PdfReader(output_file)
|
||||
# Note that inputs to getPage are 0-indexed
|
||||
assert 90 == pdf.getPage(3)['/Rotate']
|
||||
assert 90 == pdf.getPage(4)['/Rotate']
|
||||
assert 90 == pdf.getPage(5)['/Rotate']
|
||||
assert 180 == pdf.getPage(6)['/Rotate']
|
||||
assert 180 == pdf.getPage(7)['/Rotate']
|
||||
assert 180 == pdf.getPage(8)['/Rotate']
|
||||
assert 90 == pdf.get_page(3)['/Rotate']
|
||||
assert 90 == pdf.get_page(4)['/Rotate']
|
||||
assert 90 == pdf.get_page(5)['/Rotate']
|
||||
assert 180 == pdf.get_page(6)['/Rotate']
|
||||
assert 180 == pdf.get_page(7)['/Rotate']
|
||||
assert 180 == pdf.get_page(8)['/Rotate']
|
||||
|
||||
def test_cleanup_disabled():
|
||||
f = io.StringIO()
|
||||
@ -89,8 +89,8 @@ def test_cleanup_disabled():
|
||||
output_file = render("min", False)
|
||||
temp_filename = f.getvalue()[29:-1]
|
||||
assert os.path.exists(temp_filename)
|
||||
pdf = PyPDF3.PdfFileReader(temp_filename)
|
||||
assert 3 == pdf.getNumPages()
|
||||
assert [] == pdf.getOutlines()
|
||||
pdf = pypdf.PdfReader(temp_filename)
|
||||
assert 3 == pdf.get_num_pages()
|
||||
assert [] == pdf.outline
|
||||
# Clean it up manually to avoid cluttering
|
||||
os.remove(temp_filename)
|
||||
|
Loading…
Reference in New Issue
Block a user