|
|
@@ -0,0 +1,369 @@ |
|
|
|
#!/usr/bin/env python2 |
|
|
|
''' |
|
|
|
python/curses epub reader. Requires BeautifulSoup |
|
|
|
|
|
|
|
Keyboard commands: |
|
|
|
Esc/q - quit |
|
|
|
Tab/Left/Right - toggle between TOC and chapter views |
|
|
|
TOC view: |
|
|
|
Up - up a line |
|
|
|
Down - down a line |
|
|
|
PgUp - up a page |
|
|
|
PgDown - down a page |
|
|
|
Chapter view: |
|
|
|
Up - up a page |
|
|
|
Down - down a page |
|
|
|
PgUp - up a line |
|
|
|
PgDown - down a line |
|
|
|
i - open images on page in web browser |
|
|
|
''' |
|
|
|
|
|
|
|
import curses.wrapper, curses.ascii |
|
|
|
import formatter, htmllib, locale, os, StringIO, re, readline, tempfile, zipfile |
|
|
|
import base64, webbrowser |
|
|
|
|
|
|
|
from BeautifulSoup import BeautifulSoup |
|
|
|
|
|
|
|
try: |
|
|
|
from fabulous import image |
|
|
|
import PIL |
|
|
|
except ImportError: |
|
|
|
images = False |
|
|
|
else: |
|
|
|
images = True |
|
|
|
|
|
|
|
locale.setlocale(locale.LC_ALL, 'en_US.utf-8') |
|
|
|
|
|
|
|
basedir = '' |
|
|
|
|
|
|
|
def run(screen, program, *args): |
|
|
|
curses.nocbreak() |
|
|
|
screen.keypad(0) |
|
|
|
curses.echo() |
|
|
|
pid = os.fork() |
|
|
|
if not pid: |
|
|
|
os.execvp(program, (program,) + args) |
|
|
|
os.wait()[0] |
|
|
|
curses.noecho() |
|
|
|
screen.keypad(1) |
|
|
|
curses.cbreak() |
|
|
|
|
|
|
|
def open_image(screen, name, s): |
|
|
|
''' show images with PIL and fabulous ''' |
|
|
|
if not images: |
|
|
|
screen.addstr(0, 0, "missing PIL or fabulous", curses.A_REVERSE) |
|
|
|
return |
|
|
|
|
|
|
|
ext = os.path.splitext(name)[1] |
|
|
|
|
|
|
|
screen.erase() |
|
|
|
screen.refresh() |
|
|
|
curses.setsyx(0, 0) |
|
|
|
image_file = tempfile.NamedTemporaryFile(suffix=ext, delete=False) |
|
|
|
image_file.write(s) |
|
|
|
image_file.close() |
|
|
|
try: |
|
|
|
print image.Image(image_file.name) |
|
|
|
except: |
|
|
|
print image_file.name |
|
|
|
finally: |
|
|
|
os.unlink(image_file.name) |
|
|
|
|
|
|
|
def textify(html_snippet, img_size=(80, 45), maxcol=72): |
|
|
|
''' text dump of html ''' |
|
|
|
class Parser(htmllib.HTMLParser): |
|
|
|
def anchor_end(self): |
|
|
|
self.anchor = None |
|
|
|
def handle_image(self, source, alt, ismap, alight, width, height): |
|
|
|
global basedir |
|
|
|
self.handle_data( |
|
|
|
'[img="{0}{1}" "{2}"]'.format(basedir, source, alt) |
|
|
|
) |
|
|
|
|
|
|
|
class Formatter(formatter.AbstractFormatter): |
|
|
|
pass |
|
|
|
|
|
|
|
class Writer(formatter.DumbWriter): |
|
|
|
def __init__(self, fl, maxcol=72): |
|
|
|
formatter.DumbWriter.__init__(self, fl) |
|
|
|
self.maxcol = maxcol |
|
|
|
def send_label_data(self, data): |
|
|
|
self.send_flowing_data(data) |
|
|
|
self.send_flowing_data(' ') |
|
|
|
|
|
|
|
o = StringIO.StringIO() |
|
|
|
p = Parser(Formatter(Writer(o, maxcol))) |
|
|
|
p.feed(html_snippet) |
|
|
|
p.close() |
|
|
|
|
|
|
|
return o.getvalue() |
|
|
|
|
|
|
|
def table_of_contents(fl): |
|
|
|
global basedir |
|
|
|
|
|
|
|
# find opf file |
|
|
|
soup = BeautifulSoup(fl.read('META-INF/container.xml')) |
|
|
|
opf = dict(soup.find('rootfile').attrs)['full-path'] |
|
|
|
|
|
|
|
basedir = os.path.dirname(opf) |
|
|
|
if basedir: |
|
|
|
basedir = '{0}/'.format(basedir) |
|
|
|
|
|
|
|
soup = BeautifulSoup(fl.read(opf)) |
|
|
|
|
|
|
|
# title |
|
|
|
yield (soup.find('dc:title').text, None) |
|
|
|
|
|
|
|
# all files, not in order |
|
|
|
x, ncx = {}, None |
|
|
|
for item in soup.find('manifest').findAll('item'): |
|
|
|
d = dict(item.attrs) |
|
|
|
x[d['id']] = '{0}{1}'.format(basedir, d['href']) |
|
|
|
if d['media-type'] == 'application/x-dtbncx+xml': |
|
|
|
ncx = '{0}{1}'.format(basedir, d['href']) |
|
|
|
|
|
|
|
# reading order, not all files |
|
|
|
y = [] |
|
|
|
for item in soup.find('spine').findAll('itemref'): |
|
|
|
y.append(x[dict(item.attrs)['idref']]) |
|
|
|
|
|
|
|
z = {} |
|
|
|
if ncx: |
|
|
|
# get titles from the toc |
|
|
|
soup = BeautifulSoup(fl.read(ncx)) |
|
|
|
|
|
|
|
for navpoint in soup('navpoint'): |
|
|
|
k = navpoint.content.get('src', None) |
|
|
|
# strip off any anchor text |
|
|
|
k = k.split('#')[0] |
|
|
|
if k: |
|
|
|
z[k] = navpoint.navlabel.text |
|
|
|
|
|
|
|
# output |
|
|
|
for section in y: |
|
|
|
if section in z: |
|
|
|
yield (z[section].encode('utf-8'), section.encode('utf-8')) |
|
|
|
else: |
|
|
|
yield (u'', section.encode('utf-8').strip()) |
|
|
|
|
|
|
|
def list_chaps(screen, chaps, start, length): |
|
|
|
for i, (title, src) in enumerate(chaps[start:start+length]): |
|
|
|
try: |
|
|
|
if start == 0: |
|
|
|
screen.addstr(i, 0, ' {0}'.format(title), curses.A_BOLD) |
|
|
|
else: |
|
|
|
screen.addstr(i, 0, '{0:-5} {1}'.format(start, title)) |
|
|
|
except: |
|
|
|
pass |
|
|
|
start += 1 |
|
|
|
screen.refresh() |
|
|
|
return i |
|
|
|
|
|
|
|
def check_epub(fl): |
|
|
|
if os.path.isfile(fl) and os.path.splitext(fl)[1].lower() == '.epub': |
|
|
|
return True |
|
|
|
|
|
|
|
def dump_epub(fl, maxcol=float("+inf")): |
|
|
|
if not check_epub(fl): |
|
|
|
return |
|
|
|
fl = zipfile.ZipFile(fl, 'r') |
|
|
|
chaps = [i for i in table_of_contents(fl)] |
|
|
|
for title, src in chaps: |
|
|
|
print title |
|
|
|
print '-' * len(title) |
|
|
|
if src: |
|
|
|
soup = BeautifulSoup(fl.read(src)) |
|
|
|
print textify( |
|
|
|
unicode(soup.find('body')).encode('utf-8'), |
|
|
|
maxcol=maxcol, |
|
|
|
) |
|
|
|
print '\n' |
|
|
|
|
|
|
|
def curses_epub(screen, fl): |
|
|
|
if not check_epub(fl): |
|
|
|
return |
|
|
|
|
|
|
|
#curses.mousemask(curses.BUTTON1_CLICKED) |
|
|
|
|
|
|
|
fl = zipfile.ZipFile(fl, 'r') |
|
|
|
chaps = [i for i in table_of_contents(fl)] |
|
|
|
chaps_pos = [0 for i in chaps] |
|
|
|
start = 0 |
|
|
|
cursor_row = 0 |
|
|
|
|
|
|
|
# toc |
|
|
|
while True: |
|
|
|
curses.curs_set(1) |
|
|
|
maxy, maxx = screen.getmaxyx() |
|
|
|
|
|
|
|
if cursor_row >= maxy: |
|
|
|
cursor_row = maxy - 1 |
|
|
|
|
|
|
|
len_chaps = list_chaps(screen, chaps, start, maxy) |
|
|
|
screen.move(cursor_row, 0) |
|
|
|
ch = screen.getch() |
|
|
|
|
|
|
|
# quit |
|
|
|
if ch == curses.ascii.ESC: |
|
|
|
return |
|
|
|
try: |
|
|
|
if chr(ch) == 'q': |
|
|
|
return |
|
|
|
except: |
|
|
|
pass |
|
|
|
|
|
|
|
# up/down line |
|
|
|
if ch in [curses.KEY_DOWN]: |
|
|
|
if start < len(chaps) - maxy: |
|
|
|
start += 1 |
|
|
|
screen.clear() |
|
|
|
elif cursor_row < maxy - 1 and cursor_row < len_chaps: |
|
|
|
cursor_row += 1 |
|
|
|
elif ch in [curses.KEY_UP]: |
|
|
|
if start > 0: |
|
|
|
start -= 1 |
|
|
|
screen.clear() |
|
|
|
elif cursor_row > 0: |
|
|
|
cursor_row -= 1 |
|
|
|
|
|
|
|
# up/down page |
|
|
|
elif ch in [curses.KEY_NPAGE]: |
|
|
|
if start + maxy - 1 < len(chaps): |
|
|
|
start += maxy - 1 |
|
|
|
if len_chaps < maxy: |
|
|
|
start = len(chaps) - maxy |
|
|
|
screen.clear() |
|
|
|
elif ch in [curses.KEY_PPAGE]: |
|
|
|
if start > 0: |
|
|
|
start -= maxy - 1 |
|
|
|
if start < 0: |
|
|
|
start = 0 |
|
|
|
screen.clear() |
|
|
|
|
|
|
|
# to chapter |
|
|
|
elif ch in [curses.ascii.HT, curses.KEY_RIGHT, curses.KEY_LEFT]: |
|
|
|
if chaps[start + cursor_row][1]: |
|
|
|
html = fl.read(chaps[start + cursor_row][1]) |
|
|
|
soup = BeautifulSoup(html) |
|
|
|
chap = textify( |
|
|
|
unicode(soup.find('body')).encode('utf-8'), |
|
|
|
img_size=screen.getmaxyx(), |
|
|
|
maxcol=screen.getmaxyx()[1] |
|
|
|
).split('\n') |
|
|
|
else: |
|
|
|
chap = '' |
|
|
|
screen.clear() |
|
|
|
curses.curs_set(0) |
|
|
|
|
|
|
|
# chapter |
|
|
|
while True: |
|
|
|
maxy, maxx = screen.getmaxyx() |
|
|
|
images = [] |
|
|
|
for i, line in enumerate(chap[ |
|
|
|
chaps_pos[start + cursor_row]: |
|
|
|
chaps_pos[start + cursor_row] + maxy |
|
|
|
]): |
|
|
|
try: |
|
|
|
screen.addstr(i, 0, line) |
|
|
|
mch = re.search('\[img="([^"]+)" "([^"]*)"\]', line) |
|
|
|
if mch: |
|
|
|
images.append(mch.group(1)) |
|
|
|
except: |
|
|
|
pass |
|
|
|
screen.refresh() |
|
|
|
ch = screen.getch() |
|
|
|
|
|
|
|
# quit |
|
|
|
if ch == curses.ascii.ESC: |
|
|
|
return |
|
|
|
try: |
|
|
|
if chr(ch) == 'q': |
|
|
|
return |
|
|
|
except: |
|
|
|
pass |
|
|
|
|
|
|
|
# to TOC |
|
|
|
if ch in [curses.ascii.HT, curses.KEY_RIGHT, curses.KEY_LEFT]: |
|
|
|
screen.clear() |
|
|
|
break |
|
|
|
|
|
|
|
# up/down page |
|
|
|
elif ch in [curses.KEY_DOWN]: |
|
|
|
if chaps_pos[start + cursor_row] + maxy - 1 < len(chap): |
|
|
|
chaps_pos[start + cursor_row] += maxy - 1 |
|
|
|
screen.clear() |
|
|
|
elif ch in [curses.KEY_UP]: |
|
|
|
if chaps_pos[start + cursor_row] > 0: |
|
|
|
chaps_pos[start + cursor_row] -= maxy - 1 |
|
|
|
if chaps_pos[start + cursor_row] < 0: |
|
|
|
chaps_pos[start + cursor_row] = 0 |
|
|
|
screen.clear() |
|
|
|
|
|
|
|
# up/down line |
|
|
|
elif ch in [curses.KEY_NPAGE]: |
|
|
|
if chaps_pos[start + cursor_row] + maxy - 1 < len(chap): |
|
|
|
chaps_pos[start + cursor_row] += 1 |
|
|
|
screen.clear() |
|
|
|
elif ch in [curses.KEY_PPAGE]: |
|
|
|
if chaps_pos[start + cursor_row] > 0: |
|
|
|
chaps_pos[start + cursor_row] -= 1 |
|
|
|
screen.clear() |
|
|
|
|
|
|
|
#elif ch in [curses.KEY_MOUSE]: |
|
|
|
# id, x, y, z, bstate = curses.getmouse() |
|
|
|
# line = screen.instr(y, 0) |
|
|
|
# mch = re.search('\[img="([^"]+)" "([^"]*)"\]', line) |
|
|
|
# if mch: |
|
|
|
# img_fl = mch.group(1) |
|
|
|
|
|
|
|
else: |
|
|
|
try: |
|
|
|
if chr(ch) == 'i': |
|
|
|
for img in images: |
|
|
|
err = open_image(screen, img, fl.read(img)) |
|
|
|
if err: |
|
|
|
screen.addstr(0, 0, err, curses.A_REVERSE) |
|
|
|
|
|
|
|
# edit html |
|
|
|
elif chr(ch) == 'e': |
|
|
|
|
|
|
|
tmpfl = tempfile.NamedTemporaryFile(delete=False) |
|
|
|
tmpfl.write(html) |
|
|
|
tmpfl.close() |
|
|
|
run(screen, 'vim', tmpfl.name) |
|
|
|
with open(tmpfl.name) as changed: |
|
|
|
new_html = changed.read() |
|
|
|
os.unlink(tmpfl.name) |
|
|
|
if new_html != html: |
|
|
|
pass |
|
|
|
# write to zipfile? |
|
|
|
|
|
|
|
# go back to TOC |
|
|
|
screen.clear() |
|
|
|
break |
|
|
|
|
|
|
|
except (ValueError, IndexError): |
|
|
|
pass |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
import argparse |
|
|
|
|
|
|
|
parser = argparse.ArgumentParser( |
|
|
|
formatter_class=argparse.RawDescriptionHelpFormatter, |
|
|
|
description=__doc__, |
|
|
|
) |
|
|
|
parser.add_argument('-d', '--dump', action='store_true', |
|
|
|
help='dump EPUB to text') |
|
|
|
parser.add_argument('-c', '--cols', action='store', type=int, default=float("+inf"), |
|
|
|
help='Number of columns to wrap; default is no wrapping.') |
|
|
|
parser.add_argument('EPUB', help='view EPUB') |
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
if args.EPUB: |
|
|
|
if args.dump: |
|
|
|
dump_epub(args.EPUB, args.cols) |
|
|
|
else: |
|
|
|
try: |
|
|
|
curses.wrapper(curses_epub, args.EPUB) |
|
|
|
except KeyboardInterrupt: |
|
|
|
pass |