Functionally running, but only for me
This commit is contained in:
parent
c2914dd000
commit
b4449daa50
|
@ -0,0 +1,19 @@
|
||||||
|
# EditorConfig is awesome: http://EditorConfig.org
|
||||||
|
|
||||||
|
# top-most EditorConfig file
|
||||||
|
root = true
|
||||||
|
|
||||||
|
# Unix-style newlines with a newline ending every file
|
||||||
|
[*]
|
||||||
|
end_of_line = lf
|
||||||
|
insert_final_newline = true
|
||||||
|
|
||||||
|
# 4 space indentation
|
||||||
|
[*.py]
|
||||||
|
indent_style = space
|
||||||
|
indent_size = 4
|
||||||
|
|
||||||
|
# Matches the exact files either package.json or .travis.yml
|
||||||
|
[{package.json,.travis.yml}]
|
||||||
|
indent_style = space
|
||||||
|
indent_size = 2
|
|
@ -67,8 +67,8 @@ testing =
|
||||||
# console_scripts =
|
# console_scripts =
|
||||||
# script_name = pystitcher.module:function
|
# script_name = pystitcher.module:function
|
||||||
# For example:
|
# For example:
|
||||||
# console_scripts =
|
console_scripts =
|
||||||
# fibonacci = pystitcher.skeleton:run
|
pystitcher = pystitcher.skeleton:run
|
||||||
# And any other entry points, for example:
|
# And any other entry points, for example:
|
||||||
# pyscaffold.cli =
|
# pyscaffold.cli =
|
||||||
# awesome = pyscaffoldext.awesome.extension:AwesomeExtension
|
# awesome = pyscaffoldext.awesome.extension:AwesomeExtension
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
""" Bookmark class """
|
||||||
|
class Bookmark:
|
||||||
|
def __init__(self, page, title, level=1):
|
||||||
|
self.page = page
|
||||||
|
self.title = title
|
||||||
|
self.level = level
|
||||||
|
|
||||||
|
def __lt__(self, other):
|
||||||
|
return self.page < other.page
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return str([self.page, self.title, self.level])
|
|
@ -1,29 +1,14 @@
|
||||||
"""
|
"""
|
||||||
This is a skeleton file that can serve as a starting point for a Python
|
This is the entry script
|
||||||
console script. To run this script uncomment the following lines in the
|
|
||||||
``[options.entry_points]`` section in ``setup.cfg``::
|
|
||||||
|
|
||||||
console_scripts =
|
|
||||||
fibonacci = pystitcher.skeleton:run
|
|
||||||
|
|
||||||
Then run ``pip install .`` (or ``pip install -e .`` for editable mode)
|
|
||||||
which will install the command ``fibonacci`` inside your current environment.
|
|
||||||
|
|
||||||
Besides console scripts, the header (i.e. until ``_logger``...) of this file can
|
|
||||||
also be used as template for Python modules.
|
|
||||||
|
|
||||||
Note:
|
|
||||||
This skeleton file can be safely removed if not needed!
|
|
||||||
|
|
||||||
References:
|
References:
|
||||||
- https://setuptools.readthedocs.io/en/latest/userguide/entry_point.html
|
- https://setuptools.readthedocs.io/en/latest/userguide/entry_point.html
|
||||||
- https://pip.pypa.io/en/stable/reference/pip_install
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
from .stitcher import Stitcher
|
||||||
from pystitcher import __version__
|
from pystitcher import __version__
|
||||||
|
|
||||||
__author__ = "Nemo"
|
__author__ = "Nemo"
|
||||||
|
@ -33,29 +18,6 @@ __license__ = "MIT"
|
||||||
_logger = logging.getLogger(__name__)
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# ---- Python API ----
|
|
||||||
# The functions defined in this section can be imported by users in their
|
|
||||||
# Python scripts/interactive interpreter, e.g. via
|
|
||||||
# `from pystitcher.skeleton import fib`,
|
|
||||||
# when using this Python module as a library.
|
|
||||||
|
|
||||||
|
|
||||||
def fib(n):
|
|
||||||
"""Fibonacci example function
|
|
||||||
|
|
||||||
Args:
|
|
||||||
n (int): integer
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
int: n-th Fibonacci number
|
|
||||||
"""
|
|
||||||
assert n > 0
|
|
||||||
a, b = 1, 1
|
|
||||||
for i in range(n - 1):
|
|
||||||
a, b = b, a + b
|
|
||||||
return a
|
|
||||||
|
|
||||||
|
|
||||||
# ---- CLI ----
|
# ---- CLI ----
|
||||||
# The functions defined in this section are wrappers around the main Python
|
# The functions defined in this section are wrappers around the main Python
|
||||||
# API allowing them to be called directly from the terminal as a CLI
|
# API allowing them to be called directly from the terminal as a CLI
|
||||||
|
@ -72,28 +34,28 @@ def parse_args(args):
|
||||||
Returns:
|
Returns:
|
||||||
:obj:`argparse.Namespace`: command line parameters namespace
|
:obj:`argparse.Namespace`: command line parameters namespace
|
||||||
"""
|
"""
|
||||||
parser = argparse.ArgumentParser(description="Just a Fibonacci demonstration")
|
parser = argparse.ArgumentParser(description="Stitch PDF files together")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--version",
|
"--version",
|
||||||
action="version",
|
action="version",
|
||||||
version="pystitcher {ver}".format(ver=__version__),
|
version="pystitcher {ver}".format(ver=__version__),
|
||||||
)
|
)
|
||||||
parser.add_argument(dest="n", help="n-th Fibonacci number", type=int, metavar="INT")
|
parser.add_argument(dest="input", help="Input Spine markdown file", type=argparse.FileType('r', encoding='UTF-8'), metavar="spine.md")
|
||||||
|
parser.add_argument(dest="output", help="Output PDF file", type=str, metavar="output.pdf")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-v",
|
"-v",
|
||||||
"--verbose",
|
"--verbose",
|
||||||
dest="loglevel",
|
dest="loglevel",
|
||||||
help="set loglevel to INFO",
|
help="log more things",
|
||||||
action="store_const",
|
action="store_const",
|
||||||
const=logging.INFO,
|
const=logging.INFO,
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-vv",
|
"--no-cleanup",
|
||||||
"--very-verbose",
|
dest="no_cleanup",
|
||||||
dest="loglevel",
|
help="log more things",
|
||||||
help="set loglevel to DEBUG",
|
action=argparse.BooleanOptionalAction,
|
||||||
action="store_const",
|
|
||||||
const=logging.DEBUG,
|
|
||||||
)
|
)
|
||||||
return parser.parse_args(args)
|
return parser.parse_args(args)
|
||||||
|
|
||||||
|
@ -111,26 +73,16 @@ def setup_logging(loglevel):
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
"""Wrapper allowing :func:`fib` to be called with string arguments in a CLI fashion
|
"""Main CLI function
|
||||||
|
|
||||||
Instead of returning the value from :func:`fib`, it prints the result to the
|
|
||||||
``stdout`` in a nicely formatted message.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
args (List[str]): command line parameters as list of strings
|
|
||||||
(for example ``["--verbose", "42"]``).
|
|
||||||
"""
|
"""
|
||||||
args = parse_args(args)
|
args = parse_args(args)
|
||||||
setup_logging(args.loglevel)
|
setup_logging(args.loglevel)
|
||||||
_logger.debug("Starting crazy calculations...")
|
|
||||||
print("The {}-th Fibonacci number is {}".format(args.n, fib(args.n)))
|
|
||||||
_logger.info("Script ends here")
|
_logger.info("Script ends here")
|
||||||
|
stitcher = Stitcher(args.input)
|
||||||
|
stitcher.generate(args.output, not args.no_cleanup)
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
"""Calls :func:`main` passing the CLI arguments extracted from :obj:`sys.argv`
|
"""Calls :func:`main` passing the CLI arguments extracted from :obj:`sys.argv`
|
||||||
|
|
||||||
This function can be used as entry point to create console scripts with setuptools.
|
|
||||||
"""
|
"""
|
||||||
main(sys.argv[1:])
|
main(sys.argv[1:])
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,132 @@
|
||||||
|
import os
|
||||||
|
import markdown
|
||||||
|
from .bookmark import Bookmark
|
||||||
|
import html5lib
|
||||||
|
from PyPDF2 import PdfFileWriter, PdfFileReader
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
|
""" Main Stitcher class """
|
||||||
|
class Stitcher:
|
||||||
|
def __init__(self, inputBuffer):
|
||||||
|
self.files = []
|
||||||
|
self.currentPage = 1
|
||||||
|
self.title = None
|
||||||
|
self.bookmarks = []
|
||||||
|
self.currentLevel = None
|
||||||
|
self.oldBookmarks = []
|
||||||
|
self.dir = os.path.dirname(os.path.abspath(inputBuffer.name))
|
||||||
|
os.chdir(self.dir)
|
||||||
|
|
||||||
|
text = inputBuffer.read()
|
||||||
|
html = markdown.markdown(text,extensions=['attr_list'])
|
||||||
|
document = html5lib.parseFragment(html, namespaceHTMLElements=False)
|
||||||
|
for e in document.iter():
|
||||||
|
self.iter(e)
|
||||||
|
|
||||||
|
def _get_pdf_number_of_pages(self, filename):
|
||||||
|
assert os.path.isfile(filename) and os.access(filename, os.R_OK), \
|
||||||
|
"File {} doesn't exist or isn't readable".format(filename)
|
||||||
|
pdf_reader = PdfFileReader(open(filename, "rb"))
|
||||||
|
return pdf_reader.numPages
|
||||||
|
|
||||||
|
def iter(self, element):
|
||||||
|
tag = element.tag
|
||||||
|
b = None
|
||||||
|
if(tag=='h1'):
|
||||||
|
if (self.title == None):
|
||||||
|
self.title = element.text
|
||||||
|
b = Bookmark(self.currentPage, element.text, 1)
|
||||||
|
self.currentLevel = 1
|
||||||
|
elif(tag=='h2'):
|
||||||
|
b = Bookmark(self.currentPage, element.text, 2)
|
||||||
|
self.currentLevel = 2
|
||||||
|
elif(tag =='h3'):
|
||||||
|
b = Bookmark(self.currentPage, element.text, 3)
|
||||||
|
self.currentLevel = 3
|
||||||
|
elif(tag =='a'):
|
||||||
|
file = element.attrib.get('href')
|
||||||
|
b = Bookmark(self.currentPage, element.text, self.currentLevel+1)
|
||||||
|
self.currentPage += self._get_pdf_number_of_pages(file)
|
||||||
|
self.files.append(file)
|
||||||
|
if b:
|
||||||
|
self.bookmarks.append(b)
|
||||||
|
|
||||||
|
def _add_bookmark(self, targetFileHandle, title, level, page):
|
||||||
|
targetFileHandle.write("BookmarkBegin\n")
|
||||||
|
targetFileHandle.write("BookmarkTitle: " + title + "\n")
|
||||||
|
targetFileHandle.write("BookmarkLevel: " + str(level) + "\n")
|
||||||
|
targetFileHandle.write("BookmarkPageNumber: " + str(page) + "\n")
|
||||||
|
targetFileHandle.write("BookmarkZoom: FitHeight\n")
|
||||||
|
|
||||||
|
def _generate_metadata(self, filename, flatten_inner_bookmarks=True):
|
||||||
|
with open(filename, 'w') as target:
|
||||||
|
if (self.title):
|
||||||
|
target.write("InfoBegin\n")
|
||||||
|
target.write("InfoKey: Title\n")
|
||||||
|
target.write("InfoValue: " + self.title + "\n")
|
||||||
|
|
||||||
|
for b in self.oldBookmarks:
|
||||||
|
outer_level = self._get_level_from_page_number(b.page)
|
||||||
|
if (flatten_inner_bookmarks):
|
||||||
|
increment = 1
|
||||||
|
else:
|
||||||
|
increment = b.level
|
||||||
|
level = outer_level + increment
|
||||||
|
self.bookmarks.append(Bookmark(b.page+1, b.title, level))
|
||||||
|
|
||||||
|
self.bookmarks.sort()
|
||||||
|
|
||||||
|
for b in self.bookmarks:
|
||||||
|
self._add_bookmark(target, b.title, b.level, b.page)
|
||||||
|
|
||||||
|
def _generate_concat_command(self, temp_filename):
|
||||||
|
return ["pdftk"] + self.files + ['cat', 'output', temp_filename]
|
||||||
|
|
||||||
|
def _generate_temp_pdf(self, temp_filename):
|
||||||
|
self._merge(self.files, temp_filename)
|
||||||
|
self._parse_old_bookmarks(temp_filename)
|
||||||
|
|
||||||
|
def _get_level_from_page_number(self, page):
|
||||||
|
for b in self.bookmarks:
|
||||||
|
if (b.page >= page):
|
||||||
|
return b.level
|
||||||
|
|
||||||
|
def _iterate_old_bookmarks(self, pdf, bookmarks, level = 1):
|
||||||
|
if (isinstance(bookmarks, list)):
|
||||||
|
for inner_bookmark in bookmarks:
|
||||||
|
self._iterate_old_bookmarks(pdf, inner_bookmark, level+1)
|
||||||
|
else:
|
||||||
|
pageNumber = pdf.getDestinationPageNumber(bookmarks)
|
||||||
|
b = Bookmark(pageNumber, bookmarks.title, level)
|
||||||
|
self.oldBookmarks.append(b)
|
||||||
|
|
||||||
|
def _parse_old_bookmarks(self, filename):
|
||||||
|
p = PdfFileReader(open(filename, "rb"))
|
||||||
|
self._iterate_old_bookmarks(p, p.getOutlines())
|
||||||
|
|
||||||
|
def _update_metadata(self, old_filename, metadata_file, outputBuffer):
|
||||||
|
subprocess.run(['java', '-jar', 'PDFtkBox.jar', old_filename, "update_info", metadata_file, 'output', outputBuffer])
|
||||||
|
|
||||||
|
def _merge(self, paths, output):
|
||||||
|
writer = PdfFileWriter()
|
||||||
|
for inputFile in paths:
|
||||||
|
assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile)
|
||||||
|
reader = PdfFileReader(open(inputFile, 'rb'))
|
||||||
|
for page in range(1, reader.getNumPages()+1):
|
||||||
|
writer.addPage(reader.getPage(page - 1))
|
||||||
|
|
||||||
|
with open(output, 'wb') as stream:
|
||||||
|
writer.write(stream)
|
||||||
|
|
||||||
|
def generate(self, outputBuffer, delete_temp_files = False):
|
||||||
|
METADATA_FILENAME = 'metadata.txt'
|
||||||
|
TEMP_PDF_FILENAME = 'temp.pdf'
|
||||||
|
|
||||||
|
self._generate_temp_pdf(TEMP_PDF_FILENAME)
|
||||||
|
self._generate_metadata(METADATA_FILENAME)
|
||||||
|
self._update_metadata(TEMP_PDF_FILENAME, METADATA_FILENAME, outputBuffer)
|
||||||
|
|
||||||
|
if (delete_temp_files):
|
||||||
|
os.remove(METADATA_FILENAME)
|
||||||
|
os.remove(TEMP_PDF_FILENAME)
|
Loading…
Reference in New Issue