"upgrade" from PyPDF3 to pypdf

I picked the wrong fork (pypdf3 instead of pypdf2).
PyPDF2 was a fork from the original pyPdf.
After several years, the fork was merged back into pypdf (now all lowercase).
pypdf3 is now unmaintained.

pypdf meanwhile has had a lot of interesting updates, which I should
look at.
This commit is contained in:
Nemo 2024-08-12 16:28:04 +05:30
parent 16e054fa4d
commit b22459f64c
6 changed files with 34 additions and 49 deletions

1
.gitignore vendored
View File

@ -22,6 +22,7 @@ __pycache__/*
.idea
.vscode
tags
src/pystitcher/_version.py
# Package files
*.egg

View File

@ -3,7 +3,7 @@
pystitcher stitches your PDF files together, generating nice
customizable bookmarks for you using a declarative input in the form of
a markdown file. It is written in pure python and uses
[PyPDF3](https://pypi.org/project/PyPDF3/) for reading and writing PDF
[pypdf](https://pypi.org/project/pypdf/) for reading and writing PDF
files.
## Installation

View File

@ -20,7 +20,7 @@ dependencies = [
"html5lib>=1.1",
"importlib-metadata; python_version<\"3.8\"",
"Markdown>=3.6",
"PyPDF3>=1.0.6",
"pypdf>=4.3.1",
"validators>=0.33.0",
]

View File

@ -1,16 +0,0 @@
# file generated by setuptools_scm
# don't change, don't track in version control
TYPE_CHECKING = False
if TYPE_CHECKING:
from typing import Tuple, Union
VERSION_TUPLE = Tuple[Union[int, str], ...]
else:
VERSION_TUPLE = object
version: str
__version__: str
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
__version__ = version = '1.0.5.dev2+g9a37aa7.d20240812'
__version_tuple__ = version_tuple = (1, 0, 5, 'dev2', 'g9a37aa7.d20240812')

View File

@ -8,8 +8,8 @@ import validators
import html5lib
import markdown
from PyPDF3 import PdfFileWriter, PdfFileReader
from PyPDF3.generic import FloatObject
from pypdf import PdfWriter, PdfReader
from pypdf.generic import Fit
from pystitcher import __version__
from .bookmark import Bookmark
@ -70,8 +70,8 @@ class Stitcher:
def _get_pdf_number_of_pages(self, filename):
assert os.path.isfile(filename) and os.access(filename, os.R_OK), \
"File {} doesn't exist or isn't readable".format(filename)
pdf_reader = PdfFileReader(open(filename, "rb"))
return pdf_reader.numPages
pdf_reader = PdfReader(open(filename, "rb"))
return pdf_reader.get_num_pages()
"""
Return an attribute with a default value of None
@ -186,7 +186,7 @@ class Stitcher:
for inner_bookmark in bookmarks:
self._iterate_old_bookmarks(pdf, startPage, inner_bookmark, level+1)
else:
localPageNumber = pdf.getDestinationPageNumber(bookmarks)
localPageNumber = pdf.get_destination_page_number(bookmarks)
globalPageNumber = startPage + localPageNumber - 1
b = Bookmark(globalPageNumber, bookmarks.title, level, self.defaultFit)
self.oldBookmarks.append(b)
@ -198,9 +198,9 @@ class Stitcher:
"""
def _insert_bookmarks(self, old_filename, outputFilename):
stack = []
pdfInput = PdfFileReader(open(old_filename, 'rb'))
pdfOutput = PdfFileWriter()
pdfOutput.cloneDocumentFromReader(pdfInput)
pdfInput = PdfReader(open(old_filename, 'rb'))
pdfOutput = PdfWriter()
pdfOutput.clone_document_from_reader(pdfInput)
for b in self.bookmarks:
existingRef = None
# Trim the stack till the top is useful (stack.level < b.level)
@ -209,9 +209,9 @@ class Stitcher:
# If stack has something, use it
if (len(stack) > 0):
existingRef = stack[len(stack) - 1][1]
bookmargArgs = [b.title, b.page-1, existingRef, None, False, False, b.fit] + b.cords
stack.append((b, pdfOutput.addBookmark(*bookmargArgs)))
pdfOutput.addMetadata(self._getMetadata())
bookmargArgs = [b.title, b.page-1, existingRef, None, False, False, Fit(b.fit)] + b.cords
stack.append((b, pdfOutput.add_outline_item(*bookmargArgs)))
pdfOutput.add_metadata(self._getMetadata())
pdfOutput.write(open(outputFilename, 'wb'))
"""
@ -220,15 +220,15 @@ class Stitcher:
as we're reading them
"""
def _merge(self, output):
writer = PdfFileWriter()
writer = PdfWriter()
for (inputFile,startPage,filters) in self.files:
assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile)
reader = PdfFileReader(open(inputFile, 'rb'))
reader = PdfReader(open(inputFile, 'rb'))
# Recursively iterate through the old bookmarks
self._iterate_old_bookmarks(reader, startPage, reader.getOutlines())
self._iterate_old_bookmarks(reader, startPage, reader.outline)
rotate, start, end = filters
for page in range(start, end + 1):
writer.addPage(reader.getPage(page - 1).rotateClockwise(rotate))
writer.add_page(reader.get_page(page - 1).rotate(rotate))
writer.write(output)
output.close()

View File

@ -1,7 +1,7 @@
import os
import io
import PyPDF3
import pypdf
from pystitcher.stitcher import Stitcher
from pystitcher import __version__
@ -55,16 +55,16 @@ def flatten_bookmarks(bookmarks, level=0):
def get_all_bookmarks(pdf):
""" Returns a list of all bookmarks with title, page number, and level in a PDF file"""
bookmarks = flatten_bookmarks(pdf.getOutlines())
return [(d[0]['/Title'], pdf.getDestinationPageNumber(d[0]), d[1]) for d in bookmarks]
bookmarks = flatten_bookmarks(pdf.outline)
return [(d[0]['/Title'], pdf.get_destination_page_number(d[0]), d[1]) for d in bookmarks]
@pytest.mark.parametrize("name,pages,metadata,bookmarks", TEST_DATA)
def test_book(name, pages, metadata, bookmarks):
output_file = render(name)
pdf = PyPDF3.PdfFileReader(output_file)
assert pages == pdf.getNumPages()
pdf = pypdf.PdfReader(output_file)
assert pages == pdf.get_num_pages()
assert bookmarks == get_all_bookmarks(pdf)
info = pdf.getDocumentInfo()
info = pdf.metadata
identity = "pystitcher/%s" % __version__
assert identity == info['/Producer']
assert identity == info['/Creator']
@ -74,14 +74,14 @@ def test_book(name, pages, metadata, bookmarks):
def test_rotation():
""" Validates the book-rotate.pdf with pages rotated."""
output_file = render("rotate")
pdf = PyPDF3.PdfFileReader(output_file)
pdf = pypdf.PdfReader(output_file)
# Note that inputs to getPage are 0-indexed
assert 90 == pdf.getPage(3)['/Rotate']
assert 90 == pdf.getPage(4)['/Rotate']
assert 90 == pdf.getPage(5)['/Rotate']
assert 180 == pdf.getPage(6)['/Rotate']
assert 180 == pdf.getPage(7)['/Rotate']
assert 180 == pdf.getPage(8)['/Rotate']
assert 90 == pdf.get_page(3)['/Rotate']
assert 90 == pdf.get_page(4)['/Rotate']
assert 90 == pdf.get_page(5)['/Rotate']
assert 180 == pdf.get_page(6)['/Rotate']
assert 180 == pdf.get_page(7)['/Rotate']
assert 180 == pdf.get_page(8)['/Rotate']
def test_cleanup_disabled():
f = io.StringIO()
@ -89,8 +89,8 @@ def test_cleanup_disabled():
output_file = render("min", False)
temp_filename = f.getvalue()[29:-1]
assert os.path.exists(temp_filename)
pdf = PyPDF3.PdfFileReader(temp_filename)
assert 3 == pdf.getNumPages()
assert [] == pdf.getOutlines()
pdf = pypdf.PdfReader(temp_filename)
assert 3 == pdf.get_num_pages()
assert [] == pdf.outline
# Clean it up manually to avoid cluttering
os.remove(temp_filename)