Add external URL fetching of PDFs

Also changed import order according to PEP8
2021-06-27 17:33:49 +05:30 · 2021-06-27 17:33:49 +05:30 · 31faa1a36c
parent ebc9c1e0cf
commit 31faa1a36c
2 changed files with 44 additions and 5 deletions
--- a/src/pystitcher/stitcher.py
+++ b/src/pystitcher/stitcher.py
@ -1,12 +1,17 @@
 import os
-import markdown
+import logging
-from .bookmark import Bookmark
+import shutil
 import tempfile
 import urllib.request
 import validators
 import html5lib
 import markdown
 from PyPDF3 import PdfFileWriter, PdfFileReader
 from PyPDF3.generic import FloatObject
 from pystitcher import __version__
-import tempfile
+from .bookmark import Bookmark
 import logging
 _logger = logging.getLogger(__name__)
@ -45,6 +50,20 @@ class Stitcher:
        for e in document.iter():
            self.iter(e)
    """
    Check if file has been cached locally and if
    not cached, download from provided URL. Return
    download filename
    """
    def _cacheURL(self, url):
        if not os.path.exists(os.path.basename(url)):
            _logger.info("Downloading PDF from remote URL %s", url)
            with urllib.request.urlopen(url) as response, open(os.path.basename(url), 'wb') as downloadedFile:
                shutil.copyfileobj(response, downloadedFile)
        else:
            _logger.info("Locally cached PDF found at %s", url)
        return os.path.basename(url)
    """
    Get the number of pages in a PDF file
    """
@ -98,10 +117,13 @@ class Stitcher:
            self.currentLevel = 3
        elif(tag =='a'):
            file = element.attrib.get('href')
            if(validators.url(file)):
                file = self._cacheURL(file)
            fit = element.attrib.get('fit', self.defaultFit)
            rotate = int(element.attrib.get('rotate', self.defaultRotate))
            start = int(element.attrib.get('start', self.defaultStart))
-            end = int(element.attrib.get('end', self._get_pdf_number_of_pages(file) if self.defaultEnd is None else self.defaultEnd))
+            end = int(element.attrib.get('end', self._get_pdf_number_of_pages(file)
                                         if self.defaultEnd is None else self.defaultEnd))
            filters = (rotate, start, end)
            b = Bookmark(self.currentPage, element.text, self.currentLevel+1, fit)
            self.files.append((file, self.currentPage, filters))
--- a/tests/book-external-url.md
+++ b/tests/book-external-url.md
@ -0,0 +1,17 @@
 existing_bookmarks: remove
 author: Wiki, the Cat
 subject: A book about adventures of Wiki, the cat.
 keywords: wiki,potato,jelly
 # Super Potato Book
 # Volume 1
 [Part 1](1.pdf)
 # Volume 2
 [Part 2](https://unec.edu.az/application/uploads/2014/12/pdf-sample.pdf)
 # Volume 3
 [Part 3](https://juventudedesporto.cplp.org/files/sample-pdf_9359.pdf)