#!/usr/bin/env python ''' python/curses epub reader. Requires BeautifulSoup Keyboard commands: Esc/q - quit Tab/Left/Right - toggle between TOC and chapter views TOC view: Up - up a line Down - down a line PgUp - up a page PgDown - down a page Chapter view: Up - up a page Down - down a page PgUp - up a line PgDown - down a line i - open images on page in web browser ''' import curses.wrapper, curses.ascii import formatter, htmllib, locale, os, StringIO, re, readline, tempfile, zipfile import base64, webbrowser from BeautifulSoup import BeautifulSoup try: from fabulous import image import PIL except ImportError: images = False else: images = True locale.setlocale(locale.LC_ALL, 'en_US.utf-8') basedir = '' def run(screen, program, *args): curses.nocbreak() screen.keypad(0) curses.echo() pid = os.fork() if not pid: os.execvp(program, (program,) + args) os.wait()[0] curses.noecho() screen.keypad(1) curses.cbreak() def open_image(screen, name, s): ''' show images with PIL and fabulous ''' if not images: screen.addstr(0, 0, "missing PIL or fabulous", curses.A_REVERSE) return ext = os.path.splitext(name)[1] screen.erase() screen.refresh() curses.setsyx(0, 0) image_file = tempfile.NamedTemporaryFile(suffix=ext, delete=False) image_file.write(s) image_file.close() try: print image.Image(image_file.name) except: print image_file.name finally: os.unlink(image_file.name) def textify(html_snippet, img_size=(80, 45), maxcol=72): ''' text dump of html ''' class Parser(htmllib.HTMLParser): def anchor_end(self): self.anchor = None def handle_image(self, source, alt, ismap, alight, width, height): global basedir self.handle_data( '[img="{0}{1}" "{2}"]'.format(basedir, source, alt) ) class Formatter(formatter.AbstractFormatter): pass class Writer(formatter.DumbWriter): def __init__(self, fl, maxcol=72): formatter.DumbWriter.__init__(self, fl) self.maxcol = maxcol def send_label_data(self, data): self.send_flowing_data(data) self.send_flowing_data(' ') o = StringIO.StringIO() p = Parser(Formatter(Writer(o, maxcol))) p.feed(html_snippet) p.close() return o.getvalue() def table_of_contents(fl): global basedir # find opf file soup = BeautifulSoup(fl.read('META-INF/container.xml')) opf = dict(soup.find('rootfile').attrs)['full-path'] basedir = os.path.dirname(opf) if basedir: basedir = '{0}/'.format(basedir) soup = BeautifulSoup(fl.read(opf)) # title yield (soup.find('dc:title').text, None) # all files, not in order x, ncx = {}, None for item in soup.find('manifest').findAll('item'): d = dict(item.attrs) x[d['id']] = '{0}{1}'.format(basedir, d['href']) if d['media-type'] == 'application/x-dtbncx+xml': ncx = '{0}{1}'.format(basedir, d['href']) # reading order, not all files y = [] for item in soup.find('spine').findAll('itemref'): y.append(x[dict(item.attrs)['idref']]) z = {} if ncx: # get titles from the toc soup = BeautifulSoup(fl.read(ncx)) for navpoint in soup('navpoint'): k = navpoint.content.get('src', None) # strip off any anchor text k = k.split('#')[0] if k: z[k] = navpoint.navlabel.text # output for section in y: if section in z: yield (z[section].encode('utf-8'), section.encode('utf-8')) else: yield (u'', section.encode('utf-8').strip()) def list_chaps(screen, chaps, start, length): for i, (title, src) in enumerate(chaps[start:start+length]): try: if start == 0: screen.addstr(i, 0, ' {0}'.format(title), curses.A_BOLD) else: screen.addstr(i, 0, '{0:-5} {1}'.format(start, title)) except: pass start += 1 screen.refresh() return i def check_epub(fl): if os.path.isfile(fl) and os.path.splitext(fl)[1].lower() == '.epub': return True def dump_epub(fl, maxcol=float("+inf")): if not check_epub(fl): return fl = zipfile.ZipFile(fl, 'r') chaps = [i for i in table_of_contents(fl)] for title, src in chaps: print title print '-' * len(title) if src: soup = BeautifulSoup(fl.read(src)) print textify( unicode(soup.find('body')).encode('utf-8'), maxcol=maxcol, ) print '\n' def curses_epub(screen, fl): if not check_epub(fl): return #curses.mousemask(curses.BUTTON1_CLICKED) fl = zipfile.ZipFile(fl, 'r') chaps = [i for i in table_of_contents(fl)] chaps_pos = [0 for i in chaps] start = 0 cursor_row = 0 # toc while True: curses.curs_set(1) maxy, maxx = screen.getmaxyx() if cursor_row >= maxy: cursor_row = maxy - 1 len_chaps = list_chaps(screen, chaps, start, maxy) screen.move(cursor_row, 0) ch = screen.getch() # quit if ch == curses.ascii.ESC: return try: if chr(ch) == 'q': return except: pass # up/down line if ch in [curses.KEY_DOWN]: if start < len(chaps) - maxy: start += 1 screen.clear() elif cursor_row < maxy - 1 and cursor_row < len_chaps: cursor_row += 1 elif ch in [curses.KEY_UP]: if start > 0: start -= 1 screen.clear() elif cursor_row > 0: cursor_row -= 1 # up/down page elif ch in [curses.KEY_NPAGE]: if start + maxy - 1 < len(chaps): start += maxy - 1 if len_chaps < maxy: start = len(chaps) - maxy screen.clear() elif ch in [curses.KEY_PPAGE]: if start > 0: start -= maxy - 1 if start < 0: start = 0 screen.clear() # to chapter elif ch in [curses.ascii.HT, curses.KEY_RIGHT, curses.KEY_LEFT]: if chaps[start + cursor_row][1]: html = fl.read(chaps[start + cursor_row][1]) soup = BeautifulSoup(html) chap = textify( unicode(soup.find('body')).encode('utf-8'), img_size=screen.getmaxyx(), maxcol=screen.getmaxyx()[1] ).split('\n') else: chap = '' screen.clear() curses.curs_set(0) # chapter while True: maxy, maxx = screen.getmaxyx() images = [] for i, line in enumerate(chap[ chaps_pos[start + cursor_row]: chaps_pos[start + cursor_row] + maxy ]): try: screen.addstr(i, 0, line) mch = re.search('\[img="([^"]+)" "([^"]*)"\]', line) if mch: images.append(mch.group(1)) except: pass screen.refresh() ch = screen.getch() # quit if ch == curses.ascii.ESC: return try: if chr(ch) == 'q': return except: pass # to TOC if ch in [curses.ascii.HT, curses.KEY_RIGHT, curses.KEY_LEFT]: screen.clear() break # up/down page elif ch in [curses.KEY_DOWN]: if chaps_pos[start + cursor_row] + maxy - 1 < len(chap): chaps_pos[start + cursor_row] += maxy - 1 screen.clear() elif ch in [curses.KEY_UP]: if chaps_pos[start + cursor_row] > 0: chaps_pos[start + cursor_row] -= maxy - 1 if chaps_pos[start + cursor_row] < 0: chaps_pos[start + cursor_row] = 0 screen.clear() # up/down line elif ch in [curses.KEY_NPAGE]: if chaps_pos[start + cursor_row] + maxy - 1 < len(chap): chaps_pos[start + cursor_row] += 1 screen.clear() elif ch in [curses.KEY_PPAGE]: if chaps_pos[start + cursor_row] > 0: chaps_pos[start + cursor_row] -= 1 screen.clear() #elif ch in [curses.KEY_MOUSE]: # id, x, y, z, bstate = curses.getmouse() # line = screen.instr(y, 0) # mch = re.search('\[img="([^"]+)" "([^"]*)"\]', line) # if mch: # img_fl = mch.group(1) else: try: if chr(ch) == 'i': for img in images: err = open_image(screen, img, fl.read(img)) if err: screen.addstr(0, 0, err, curses.A_REVERSE) # edit html elif chr(ch) == 'e': tmpfl = tempfile.NamedTemporaryFile(delete=False) tmpfl.write(html) tmpfl.close() run(screen, 'vim', tmpfl.name) with open(tmpfl.name) as changed: new_html = changed.read() os.unlink(tmpfl.name) if new_html != html: pass # write to zipfile? # go back to TOC screen.clear() break except (ValueError, IndexError): pass if __name__ == '__main__': import argparse parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__, ) parser.add_argument('-d', '--dump', action='store_true', help='dump EPUB to text') parser.add_argument('-c', '--cols', action='store', type=int, default=float("+inf"), help='Number of columns to wrap; default is no wrapping.') parser.add_argument('EPUB', help='view EPUB') args = parser.parse_args() if args.EPUB: if args.dump: dump_epub(args.EPUB, args.cols) else: try: curses.wrapper(curses_epub, args.EPUB) except KeyboardInterrupt: pass