mirror of https://github.com/captn3m0/Scripts.git
Script to read epubs in console.
This commit is contained in:
parent
ee0314b046
commit
e21f8b5739
|
@ -0,0 +1,369 @@
|
|||
#!/usr/bin/env python
|
||||
'''
|
||||
python/curses epub reader. Requires BeautifulSoup
|
||||
|
||||
Keyboard commands:
|
||||
Esc/q - quit
|
||||
Tab/Left/Right - toggle between TOC and chapter views
|
||||
TOC view:
|
||||
Up - up a line
|
||||
Down - down a line
|
||||
PgUp - up a page
|
||||
PgDown - down a page
|
||||
Chapter view:
|
||||
Up - up a page
|
||||
Down - down a page
|
||||
PgUp - up a line
|
||||
PgDown - down a line
|
||||
i - open images on page in web browser
|
||||
'''
|
||||
|
||||
import curses.wrapper, curses.ascii
|
||||
import formatter, htmllib, locale, os, StringIO, re, readline, tempfile, zipfile
|
||||
import base64, webbrowser
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
||||
try:
|
||||
from fabulous import image
|
||||
import PIL
|
||||
except ImportError:
|
||||
images = False
|
||||
else:
|
||||
images = True
|
||||
|
||||
locale.setlocale(locale.LC_ALL, 'en_US.utf-8')
|
||||
|
||||
basedir = ''
|
||||
|
||||
def run(screen, program, *args):
|
||||
curses.nocbreak()
|
||||
screen.keypad(0)
|
||||
curses.echo()
|
||||
pid = os.fork()
|
||||
if not pid:
|
||||
os.execvp(program, (program,) + args)
|
||||
os.wait()[0]
|
||||
curses.noecho()
|
||||
screen.keypad(1)
|
||||
curses.cbreak()
|
||||
|
||||
def open_image(screen, name, s):
|
||||
''' show images with PIL and fabulous '''
|
||||
if not images:
|
||||
screen.addstr(0, 0, "missing PIL or fabulous", curses.A_REVERSE)
|
||||
return
|
||||
|
||||
ext = os.path.splitext(name)[1]
|
||||
|
||||
screen.erase()
|
||||
screen.refresh()
|
||||
curses.setsyx(0, 0)
|
||||
image_file = tempfile.NamedTemporaryFile(suffix=ext, delete=False)
|
||||
image_file.write(s)
|
||||
image_file.close()
|
||||
try:
|
||||
print image.Image(image_file.name)
|
||||
except:
|
||||
print image_file.name
|
||||
finally:
|
||||
os.unlink(image_file.name)
|
||||
|
||||
def textify(html_snippet, img_size=(80, 45), maxcol=72):
|
||||
''' text dump of html '''
|
||||
class Parser(htmllib.HTMLParser):
|
||||
def anchor_end(self):
|
||||
self.anchor = None
|
||||
def handle_image(self, source, alt, ismap, alight, width, height):
|
||||
global basedir
|
||||
self.handle_data(
|
||||
'[img="{0}{1}" "{2}"]'.format(basedir, source, alt)
|
||||
)
|
||||
|
||||
class Formatter(formatter.AbstractFormatter):
|
||||
pass
|
||||
|
||||
class Writer(formatter.DumbWriter):
|
||||
def __init__(self, fl, maxcol=72):
|
||||
formatter.DumbWriter.__init__(self, fl)
|
||||
self.maxcol = maxcol
|
||||
def send_label_data(self, data):
|
||||
self.send_flowing_data(data)
|
||||
self.send_flowing_data(' ')
|
||||
|
||||
o = StringIO.StringIO()
|
||||
p = Parser(Formatter(Writer(o, maxcol)))
|
||||
p.feed(html_snippet)
|
||||
p.close()
|
||||
|
||||
return o.getvalue()
|
||||
|
||||
def table_of_contents(fl):
|
||||
global basedir
|
||||
|
||||
# find opf file
|
||||
soup = BeautifulSoup(fl.read('META-INF/container.xml'))
|
||||
opf = dict(soup.find('rootfile').attrs)['full-path']
|
||||
|
||||
basedir = os.path.dirname(opf)
|
||||
if basedir:
|
||||
basedir = '{0}/'.format(basedir)
|
||||
|
||||
soup = BeautifulSoup(fl.read(opf))
|
||||
|
||||
# title
|
||||
yield (soup.find('dc:title').text, None)
|
||||
|
||||
# all files, not in order
|
||||
x, ncx = {}, None
|
||||
for item in soup.find('manifest').findAll('item'):
|
||||
d = dict(item.attrs)
|
||||
x[d['id']] = '{0}{1}'.format(basedir, d['href'])
|
||||
if d['media-type'] == 'application/x-dtbncx+xml':
|
||||
ncx = '{0}{1}'.format(basedir, d['href'])
|
||||
|
||||
# reading order, not all files
|
||||
y = []
|
||||
for item in soup.find('spine').findAll('itemref'):
|
||||
y.append(x[dict(item.attrs)['idref']])
|
||||
|
||||
z = {}
|
||||
if ncx:
|
||||
# get titles from the toc
|
||||
soup = BeautifulSoup(fl.read(ncx))
|
||||
|
||||
for navpoint in soup('navpoint'):
|
||||
k = navpoint.content.get('src', None)
|
||||
# strip off any anchor text
|
||||
k = k.split('#')[0]
|
||||
if k:
|
||||
z[k] = navpoint.navlabel.text
|
||||
|
||||
# output
|
||||
for section in y:
|
||||
if section in z:
|
||||
yield (z[section].encode('utf-8'), section.encode('utf-8'))
|
||||
else:
|
||||
yield (u'', section.encode('utf-8').strip())
|
||||
|
||||
def list_chaps(screen, chaps, start, length):
|
||||
for i, (title, src) in enumerate(chaps[start:start+length]):
|
||||
try:
|
||||
if start == 0:
|
||||
screen.addstr(i, 0, ' {0}'.format(title), curses.A_BOLD)
|
||||
else:
|
||||
screen.addstr(i, 0, '{0:-5} {1}'.format(start, title))
|
||||
except:
|
||||
pass
|
||||
start += 1
|
||||
screen.refresh()
|
||||
return i
|
||||
|
||||
def check_epub(fl):
|
||||
if os.path.isfile(fl) and os.path.splitext(fl)[1].lower() == '.epub':
|
||||
return True
|
||||
|
||||
def dump_epub(fl, maxcol=float("+inf")):
|
||||
if not check_epub(fl):
|
||||
return
|
||||
fl = zipfile.ZipFile(fl, 'r')
|
||||
chaps = [i for i in table_of_contents(fl)]
|
||||
for title, src in chaps:
|
||||
print title
|
||||
print '-' * len(title)
|
||||
if src:
|
||||
soup = BeautifulSoup(fl.read(src))
|
||||
print textify(
|
||||
unicode(soup.find('body')).encode('utf-8'),
|
||||
maxcol=maxcol,
|
||||
)
|
||||
print '\n'
|
||||
|
||||
def curses_epub(screen, fl):
|
||||
if not check_epub(fl):
|
||||
return
|
||||
|
||||
#curses.mousemask(curses.BUTTON1_CLICKED)
|
||||
|
||||
fl = zipfile.ZipFile(fl, 'r')
|
||||
chaps = [i for i in table_of_contents(fl)]
|
||||
chaps_pos = [0 for i in chaps]
|
||||
start = 0
|
||||
cursor_row = 0
|
||||
|
||||
# toc
|
||||
while True:
|
||||
curses.curs_set(1)
|
||||
maxy, maxx = screen.getmaxyx()
|
||||
|
||||
if cursor_row >= maxy:
|
||||
cursor_row = maxy - 1
|
||||
|
||||
len_chaps = list_chaps(screen, chaps, start, maxy)
|
||||
screen.move(cursor_row, 0)
|
||||
ch = screen.getch()
|
||||
|
||||
# quit
|
||||
if ch == curses.ascii.ESC:
|
||||
return
|
||||
try:
|
||||
if chr(ch) == 'q':
|
||||
return
|
||||
except:
|
||||
pass
|
||||
|
||||
# up/down line
|
||||
if ch in [curses.KEY_DOWN]:
|
||||
if start < len(chaps) - maxy:
|
||||
start += 1
|
||||
screen.clear()
|
||||
elif cursor_row < maxy - 1 and cursor_row < len_chaps:
|
||||
cursor_row += 1
|
||||
elif ch in [curses.KEY_UP]:
|
||||
if start > 0:
|
||||
start -= 1
|
||||
screen.clear()
|
||||
elif cursor_row > 0:
|
||||
cursor_row -= 1
|
||||
|
||||
# up/down page
|
||||
elif ch in [curses.KEY_NPAGE]:
|
||||
if start + maxy - 1 < len(chaps):
|
||||
start += maxy - 1
|
||||
if len_chaps < maxy:
|
||||
start = len(chaps) - maxy
|
||||
screen.clear()
|
||||
elif ch in [curses.KEY_PPAGE]:
|
||||
if start > 0:
|
||||
start -= maxy - 1
|
||||
if start < 0:
|
||||
start = 0
|
||||
screen.clear()
|
||||
|
||||
# to chapter
|
||||
elif ch in [curses.ascii.HT, curses.KEY_RIGHT, curses.KEY_LEFT]:
|
||||
if chaps[start + cursor_row][1]:
|
||||
html = fl.read(chaps[start + cursor_row][1])
|
||||
soup = BeautifulSoup(html)
|
||||
chap = textify(
|
||||
unicode(soup.find('body')).encode('utf-8'),
|
||||
img_size=screen.getmaxyx(),
|
||||
maxcol=screen.getmaxyx()[1]
|
||||
).split('\n')
|
||||
else:
|
||||
chap = ''
|
||||
screen.clear()
|
||||
curses.curs_set(0)
|
||||
|
||||
# chapter
|
||||
while True:
|
||||
maxy, maxx = screen.getmaxyx()
|
||||
images = []
|
||||
for i, line in enumerate(chap[
|
||||
chaps_pos[start + cursor_row]:
|
||||
chaps_pos[start + cursor_row] + maxy
|
||||
]):
|
||||
try:
|
||||
screen.addstr(i, 0, line)
|
||||
mch = re.search('\[img="([^"]+)" "([^"]*)"\]', line)
|
||||
if mch:
|
||||
images.append(mch.group(1))
|
||||
except:
|
||||
pass
|
||||
screen.refresh()
|
||||
ch = screen.getch()
|
||||
|
||||
# quit
|
||||
if ch == curses.ascii.ESC:
|
||||
return
|
||||
try:
|
||||
if chr(ch) == 'q':
|
||||
return
|
||||
except:
|
||||
pass
|
||||
|
||||
# to TOC
|
||||
if ch in [curses.ascii.HT, curses.KEY_RIGHT, curses.KEY_LEFT]:
|
||||
screen.clear()
|
||||
break
|
||||
|
||||
# up/down page
|
||||
elif ch in [curses.KEY_DOWN]:
|
||||
if chaps_pos[start + cursor_row] + maxy - 1 < len(chap):
|
||||
chaps_pos[start + cursor_row] += maxy - 1
|
||||
screen.clear()
|
||||
elif ch in [curses.KEY_UP]:
|
||||
if chaps_pos[start + cursor_row] > 0:
|
||||
chaps_pos[start + cursor_row] -= maxy - 1
|
||||
if chaps_pos[start + cursor_row] < 0:
|
||||
chaps_pos[start + cursor_row] = 0
|
||||
screen.clear()
|
||||
|
||||
# up/down line
|
||||
elif ch in [curses.KEY_NPAGE]:
|
||||
if chaps_pos[start + cursor_row] + maxy - 1 < len(chap):
|
||||
chaps_pos[start + cursor_row] += 1
|
||||
screen.clear()
|
||||
elif ch in [curses.KEY_PPAGE]:
|
||||
if chaps_pos[start + cursor_row] > 0:
|
||||
chaps_pos[start + cursor_row] -= 1
|
||||
screen.clear()
|
||||
|
||||
#elif ch in [curses.KEY_MOUSE]:
|
||||
# id, x, y, z, bstate = curses.getmouse()
|
||||
# line = screen.instr(y, 0)
|
||||
# mch = re.search('\[img="([^"]+)" "([^"]*)"\]', line)
|
||||
# if mch:
|
||||
# img_fl = mch.group(1)
|
||||
|
||||
else:
|
||||
try:
|
||||
if chr(ch) == 'i':
|
||||
for img in images:
|
||||
err = open_image(screen, img, fl.read(img))
|
||||
if err:
|
||||
screen.addstr(0, 0, err, curses.A_REVERSE)
|
||||
|
||||
# edit html
|
||||
elif chr(ch) == 'e':
|
||||
|
||||
tmpfl = tempfile.NamedTemporaryFile(delete=False)
|
||||
tmpfl.write(html)
|
||||
tmpfl.close()
|
||||
run(screen, 'vim', tmpfl.name)
|
||||
with open(tmpfl.name) as changed:
|
||||
new_html = changed.read()
|
||||
os.unlink(tmpfl.name)
|
||||
if new_html != html:
|
||||
pass
|
||||
# write to zipfile?
|
||||
|
||||
# go back to TOC
|
||||
screen.clear()
|
||||
break
|
||||
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
description=__doc__,
|
||||
)
|
||||
parser.add_argument('-d', '--dump', action='store_true',
|
||||
help='dump EPUB to text')
|
||||
parser.add_argument('-c', '--cols', action='store', type=int, default=float("+inf"),
|
||||
help='Number of columns to wrap; default is no wrapping.')
|
||||
parser.add_argument('EPUB', help='view EPUB')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.EPUB:
|
||||
if args.dump:
|
||||
dump_epub(args.EPUB, args.cols)
|
||||
else:
|
||||
try:
|
||||
curses.wrapper(curses_epub, args.EPUB)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
Loading…
Reference in New Issue