From e21f8b5739370db36d2ff4ce216126b9dc6e0653 Mon Sep 17 00:00:00 2001 From: Abhay Rana Date: Wed, 3 Sep 2014 06:04:35 +0530 Subject: [PATCH] Script to read epubs in console. --- epub.py | 369 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 369 insertions(+) create mode 100755 epub.py diff --git a/epub.py b/epub.py new file mode 100755 index 0000000..8633cb1 --- /dev/null +++ b/epub.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python +''' +python/curses epub reader. Requires BeautifulSoup + +Keyboard commands: + Esc/q - quit + Tab/Left/Right - toggle between TOC and chapter views + TOC view: + Up - up a line + Down - down a line + PgUp - up a page + PgDown - down a page + Chapter view: + Up - up a page + Down - down a page + PgUp - up a line + PgDown - down a line + i - open images on page in web browser +''' + +import curses.wrapper, curses.ascii +import formatter, htmllib, locale, os, StringIO, re, readline, tempfile, zipfile +import base64, webbrowser + +from BeautifulSoup import BeautifulSoup + +try: + from fabulous import image + import PIL +except ImportError: + images = False +else: + images = True + +locale.setlocale(locale.LC_ALL, 'en_US.utf-8') + +basedir = '' + +def run(screen, program, *args): + curses.nocbreak() + screen.keypad(0) + curses.echo() + pid = os.fork() + if not pid: + os.execvp(program, (program,) + args) + os.wait()[0] + curses.noecho() + screen.keypad(1) + curses.cbreak() + +def open_image(screen, name, s): + ''' show images with PIL and fabulous ''' + if not images: + screen.addstr(0, 0, "missing PIL or fabulous", curses.A_REVERSE) + return + + ext = os.path.splitext(name)[1] + + screen.erase() + screen.refresh() + curses.setsyx(0, 0) + image_file = tempfile.NamedTemporaryFile(suffix=ext, delete=False) + image_file.write(s) + image_file.close() + try: + print image.Image(image_file.name) + except: + print image_file.name + finally: + os.unlink(image_file.name) + +def textify(html_snippet, img_size=(80, 45), maxcol=72): + ''' text dump of html ''' + class Parser(htmllib.HTMLParser): + def anchor_end(self): + self.anchor = None + def handle_image(self, source, alt, ismap, alight, width, height): + global basedir + self.handle_data( + '[img="{0}{1}" "{2}"]'.format(basedir, source, alt) + ) + + class Formatter(formatter.AbstractFormatter): + pass + + class Writer(formatter.DumbWriter): + def __init__(self, fl, maxcol=72): + formatter.DumbWriter.__init__(self, fl) + self.maxcol = maxcol + def send_label_data(self, data): + self.send_flowing_data(data) + self.send_flowing_data(' ') + + o = StringIO.StringIO() + p = Parser(Formatter(Writer(o, maxcol))) + p.feed(html_snippet) + p.close() + + return o.getvalue() + +def table_of_contents(fl): + global basedir + + # find opf file + soup = BeautifulSoup(fl.read('META-INF/container.xml')) + opf = dict(soup.find('rootfile').attrs)['full-path'] + + basedir = os.path.dirname(opf) + if basedir: + basedir = '{0}/'.format(basedir) + + soup = BeautifulSoup(fl.read(opf)) + + # title + yield (soup.find('dc:title').text, None) + + # all files, not in order + x, ncx = {}, None + for item in soup.find('manifest').findAll('item'): + d = dict(item.attrs) + x[d['id']] = '{0}{1}'.format(basedir, d['href']) + if d['media-type'] == 'application/x-dtbncx+xml': + ncx = '{0}{1}'.format(basedir, d['href']) + + # reading order, not all files + y = [] + for item in soup.find('spine').findAll('itemref'): + y.append(x[dict(item.attrs)['idref']]) + + z = {} + if ncx: + # get titles from the toc + soup = BeautifulSoup(fl.read(ncx)) + + for navpoint in soup('navpoint'): + k = navpoint.content.get('src', None) + # strip off any anchor text + k = k.split('#')[0] + if k: + z[k] = navpoint.navlabel.text + + # output + for section in y: + if section in z: + yield (z[section].encode('utf-8'), section.encode('utf-8')) + else: + yield (u'', section.encode('utf-8').strip()) + +def list_chaps(screen, chaps, start, length): + for i, (title, src) in enumerate(chaps[start:start+length]): + try: + if start == 0: + screen.addstr(i, 0, ' {0}'.format(title), curses.A_BOLD) + else: + screen.addstr(i, 0, '{0:-5} {1}'.format(start, title)) + except: + pass + start += 1 + screen.refresh() + return i + +def check_epub(fl): + if os.path.isfile(fl) and os.path.splitext(fl)[1].lower() == '.epub': + return True + +def dump_epub(fl, maxcol=float("+inf")): + if not check_epub(fl): + return + fl = zipfile.ZipFile(fl, 'r') + chaps = [i for i in table_of_contents(fl)] + for title, src in chaps: + print title + print '-' * len(title) + if src: + soup = BeautifulSoup(fl.read(src)) + print textify( + unicode(soup.find('body')).encode('utf-8'), + maxcol=maxcol, + ) + print '\n' + +def curses_epub(screen, fl): + if not check_epub(fl): + return + + #curses.mousemask(curses.BUTTON1_CLICKED) + + fl = zipfile.ZipFile(fl, 'r') + chaps = [i for i in table_of_contents(fl)] + chaps_pos = [0 for i in chaps] + start = 0 + cursor_row = 0 + + # toc + while True: + curses.curs_set(1) + maxy, maxx = screen.getmaxyx() + + if cursor_row >= maxy: + cursor_row = maxy - 1 + + len_chaps = list_chaps(screen, chaps, start, maxy) + screen.move(cursor_row, 0) + ch = screen.getch() + + # quit + if ch == curses.ascii.ESC: + return + try: + if chr(ch) == 'q': + return + except: + pass + + # up/down line + if ch in [curses.KEY_DOWN]: + if start < len(chaps) - maxy: + start += 1 + screen.clear() + elif cursor_row < maxy - 1 and cursor_row < len_chaps: + cursor_row += 1 + elif ch in [curses.KEY_UP]: + if start > 0: + start -= 1 + screen.clear() + elif cursor_row > 0: + cursor_row -= 1 + + # up/down page + elif ch in [curses.KEY_NPAGE]: + if start + maxy - 1 < len(chaps): + start += maxy - 1 + if len_chaps < maxy: + start = len(chaps) - maxy + screen.clear() + elif ch in [curses.KEY_PPAGE]: + if start > 0: + start -= maxy - 1 + if start < 0: + start = 0 + screen.clear() + + # to chapter + elif ch in [curses.ascii.HT, curses.KEY_RIGHT, curses.KEY_LEFT]: + if chaps[start + cursor_row][1]: + html = fl.read(chaps[start + cursor_row][1]) + soup = BeautifulSoup(html) + chap = textify( + unicode(soup.find('body')).encode('utf-8'), + img_size=screen.getmaxyx(), + maxcol=screen.getmaxyx()[1] + ).split('\n') + else: + chap = '' + screen.clear() + curses.curs_set(0) + + # chapter + while True: + maxy, maxx = screen.getmaxyx() + images = [] + for i, line in enumerate(chap[ + chaps_pos[start + cursor_row]: + chaps_pos[start + cursor_row] + maxy + ]): + try: + screen.addstr(i, 0, line) + mch = re.search('\[img="([^"]+)" "([^"]*)"\]', line) + if mch: + images.append(mch.group(1)) + except: + pass + screen.refresh() + ch = screen.getch() + + # quit + if ch == curses.ascii.ESC: + return + try: + if chr(ch) == 'q': + return + except: + pass + + # to TOC + if ch in [curses.ascii.HT, curses.KEY_RIGHT, curses.KEY_LEFT]: + screen.clear() + break + + # up/down page + elif ch in [curses.KEY_DOWN]: + if chaps_pos[start + cursor_row] + maxy - 1 < len(chap): + chaps_pos[start + cursor_row] += maxy - 1 + screen.clear() + elif ch in [curses.KEY_UP]: + if chaps_pos[start + cursor_row] > 0: + chaps_pos[start + cursor_row] -= maxy - 1 + if chaps_pos[start + cursor_row] < 0: + chaps_pos[start + cursor_row] = 0 + screen.clear() + + # up/down line + elif ch in [curses.KEY_NPAGE]: + if chaps_pos[start + cursor_row] + maxy - 1 < len(chap): + chaps_pos[start + cursor_row] += 1 + screen.clear() + elif ch in [curses.KEY_PPAGE]: + if chaps_pos[start + cursor_row] > 0: + chaps_pos[start + cursor_row] -= 1 + screen.clear() + + #elif ch in [curses.KEY_MOUSE]: + # id, x, y, z, bstate = curses.getmouse() + # line = screen.instr(y, 0) + # mch = re.search('\[img="([^"]+)" "([^"]*)"\]', line) + # if mch: + # img_fl = mch.group(1) + + else: + try: + if chr(ch) == 'i': + for img in images: + err = open_image(screen, img, fl.read(img)) + if err: + screen.addstr(0, 0, err, curses.A_REVERSE) + + # edit html + elif chr(ch) == 'e': + + tmpfl = tempfile.NamedTemporaryFile(delete=False) + tmpfl.write(html) + tmpfl.close() + run(screen, 'vim', tmpfl.name) + with open(tmpfl.name) as changed: + new_html = changed.read() + os.unlink(tmpfl.name) + if new_html != html: + pass + # write to zipfile? + + # go back to TOC + screen.clear() + break + + except (ValueError, IndexError): + pass + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description=__doc__, + ) + parser.add_argument('-d', '--dump', action='store_true', + help='dump EPUB to text') + parser.add_argument('-c', '--cols', action='store', type=int, default=float("+inf"), + help='Number of columns to wrap; default is no wrapping.') + parser.add_argument('EPUB', help='view EPUB') + args = parser.parse_args() + + if args.EPUB: + if args.dump: + dump_epub(args.EPUB, args.cols) + else: + try: + curses.wrapper(curses_epub, args.EPUB) + except KeyboardInterrupt: + pass