|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369 |
- #!/usr/bin/env python2
- '''
- python/curses epub reader. Requires BeautifulSoup
-
- Keyboard commands:
- Esc/q - quit
- Tab/Left/Right - toggle between TOC and chapter views
- TOC view:
- Up - up a line
- Down - down a line
- PgUp - up a page
- PgDown - down a page
- Chapter view:
- Up - up a page
- Down - down a page
- PgUp - up a line
- PgDown - down a line
- i - open images on page in web browser
- '''
-
- import curses.wrapper, curses.ascii
- import formatter, htmllib, locale, os, StringIO, re, readline, tempfile, zipfile
- import base64, webbrowser
-
- from BeautifulSoup import BeautifulSoup
-
- try:
- from fabulous import image
- import PIL
- except ImportError:
- images = False
- else:
- images = True
-
- locale.setlocale(locale.LC_ALL, 'en_US.utf-8')
-
- basedir = ''
-
- def run(screen, program, *args):
- curses.nocbreak()
- screen.keypad(0)
- curses.echo()
- pid = os.fork()
- if not pid:
- os.execvp(program, (program,) + args)
- os.wait()[0]
- curses.noecho()
- screen.keypad(1)
- curses.cbreak()
-
- def open_image(screen, name, s):
- ''' show images with PIL and fabulous '''
- if not images:
- screen.addstr(0, 0, "missing PIL or fabulous", curses.A_REVERSE)
- return
-
- ext = os.path.splitext(name)[1]
-
- screen.erase()
- screen.refresh()
- curses.setsyx(0, 0)
- image_file = tempfile.NamedTemporaryFile(suffix=ext, delete=False)
- image_file.write(s)
- image_file.close()
- try:
- print image.Image(image_file.name)
- except:
- print image_file.name
- finally:
- os.unlink(image_file.name)
-
- def textify(html_snippet, img_size=(80, 45), maxcol=72):
- ''' text dump of html '''
- class Parser(htmllib.HTMLParser):
- def anchor_end(self):
- self.anchor = None
- def handle_image(self, source, alt, ismap, alight, width, height):
- global basedir
- self.handle_data(
- '[img="{0}{1}" "{2}"]'.format(basedir, source, alt)
- )
-
- class Formatter(formatter.AbstractFormatter):
- pass
-
- class Writer(formatter.DumbWriter):
- def __init__(self, fl, maxcol=72):
- formatter.DumbWriter.__init__(self, fl)
- self.maxcol = maxcol
- def send_label_data(self, data):
- self.send_flowing_data(data)
- self.send_flowing_data(' ')
-
- o = StringIO.StringIO()
- p = Parser(Formatter(Writer(o, maxcol)))
- p.feed(html_snippet)
- p.close()
-
- return o.getvalue()
-
- def table_of_contents(fl):
- global basedir
-
- # find opf file
- soup = BeautifulSoup(fl.read('META-INF/container.xml'))
- opf = dict(soup.find('rootfile').attrs)['full-path']
-
- basedir = os.path.dirname(opf)
- if basedir:
- basedir = '{0}/'.format(basedir)
-
- soup = BeautifulSoup(fl.read(opf))
-
- # title
- yield (soup.find('dc:title').text, None)
-
- # all files, not in order
- x, ncx = {}, None
- for item in soup.find('manifest').findAll('item'):
- d = dict(item.attrs)
- x[d['id']] = '{0}{1}'.format(basedir, d['href'])
- if d['media-type'] == 'application/x-dtbncx+xml':
- ncx = '{0}{1}'.format(basedir, d['href'])
-
- # reading order, not all files
- y = []
- for item in soup.find('spine').findAll('itemref'):
- y.append(x[dict(item.attrs)['idref']])
-
- z = {}
- if ncx:
- # get titles from the toc
- soup = BeautifulSoup(fl.read(ncx))
-
- for navpoint in soup('navpoint'):
- k = navpoint.content.get('src', None)
- # strip off any anchor text
- k = k.split('#')[0]
- if k:
- z[k] = navpoint.navlabel.text
-
- # output
- for section in y:
- if section in z:
- yield (z[section].encode('utf-8'), section.encode('utf-8'))
- else:
- yield (u'', section.encode('utf-8').strip())
-
- def list_chaps(screen, chaps, start, length):
- for i, (title, src) in enumerate(chaps[start:start+length]):
- try:
- if start == 0:
- screen.addstr(i, 0, ' {0}'.format(title), curses.A_BOLD)
- else:
- screen.addstr(i, 0, '{0:-5} {1}'.format(start, title))
- except:
- pass
- start += 1
- screen.refresh()
- return i
-
- def check_epub(fl):
- if os.path.isfile(fl) and os.path.splitext(fl)[1].lower() == '.epub':
- return True
-
- def dump_epub(fl, maxcol=float("+inf")):
- if not check_epub(fl):
- return
- fl = zipfile.ZipFile(fl, 'r')
- chaps = [i for i in table_of_contents(fl)]
- for title, src in chaps:
- print title
- print '-' * len(title)
- if src:
- soup = BeautifulSoup(fl.read(src))
- print textify(
- unicode(soup.find('body')).encode('utf-8'),
- maxcol=maxcol,
- )
- print '\n'
-
- def curses_epub(screen, fl):
- if not check_epub(fl):
- return
-
- #curses.mousemask(curses.BUTTON1_CLICKED)
-
- fl = zipfile.ZipFile(fl, 'r')
- chaps = [i for i in table_of_contents(fl)]
- chaps_pos = [0 for i in chaps]
- start = 0
- cursor_row = 0
-
- # toc
- while True:
- curses.curs_set(1)
- maxy, maxx = screen.getmaxyx()
-
- if cursor_row >= maxy:
- cursor_row = maxy - 1
-
- len_chaps = list_chaps(screen, chaps, start, maxy)
- screen.move(cursor_row, 0)
- ch = screen.getch()
-
- # quit
- if ch == curses.ascii.ESC:
- return
- try:
- if chr(ch) == 'q':
- return
- except:
- pass
-
- # up/down line
- if ch in [curses.KEY_DOWN]:
- if start < len(chaps) - maxy:
- start += 1
- screen.clear()
- elif cursor_row < maxy - 1 and cursor_row < len_chaps:
- cursor_row += 1
- elif ch in [curses.KEY_UP]:
- if start > 0:
- start -= 1
- screen.clear()
- elif cursor_row > 0:
- cursor_row -= 1
-
- # up/down page
- elif ch in [curses.KEY_NPAGE]:
- if start + maxy - 1 < len(chaps):
- start += maxy - 1
- if len_chaps < maxy:
- start = len(chaps) - maxy
- screen.clear()
- elif ch in [curses.KEY_PPAGE]:
- if start > 0:
- start -= maxy - 1
- if start < 0:
- start = 0
- screen.clear()
-
- # to chapter
- elif ch in [curses.ascii.HT, curses.KEY_RIGHT, curses.KEY_LEFT]:
- if chaps[start + cursor_row][1]:
- html = fl.read(chaps[start + cursor_row][1])
- soup = BeautifulSoup(html)
- chap = textify(
- unicode(soup.find('body')).encode('utf-8'),
- img_size=screen.getmaxyx(),
- maxcol=screen.getmaxyx()[1]
- ).split('\n')
- else:
- chap = ''
- screen.clear()
- curses.curs_set(0)
-
- # chapter
- while True:
- maxy, maxx = screen.getmaxyx()
- images = []
- for i, line in enumerate(chap[
- chaps_pos[start + cursor_row]:
- chaps_pos[start + cursor_row] + maxy
- ]):
- try:
- screen.addstr(i, 0, line)
- mch = re.search('\[img="([^"]+)" "([^"]*)"\]', line)
- if mch:
- images.append(mch.group(1))
- except:
- pass
- screen.refresh()
- ch = screen.getch()
-
- # quit
- if ch == curses.ascii.ESC:
- return
- try:
- if chr(ch) == 'q':
- return
- except:
- pass
-
- # to TOC
- if ch in [curses.ascii.HT, curses.KEY_RIGHT, curses.KEY_LEFT]:
- screen.clear()
- break
-
- # up/down page
- elif ch in [curses.KEY_DOWN]:
- if chaps_pos[start + cursor_row] + maxy - 1 < len(chap):
- chaps_pos[start + cursor_row] += maxy - 1
- screen.clear()
- elif ch in [curses.KEY_UP]:
- if chaps_pos[start + cursor_row] > 0:
- chaps_pos[start + cursor_row] -= maxy - 1
- if chaps_pos[start + cursor_row] < 0:
- chaps_pos[start + cursor_row] = 0
- screen.clear()
-
- # up/down line
- elif ch in [curses.KEY_NPAGE]:
- if chaps_pos[start + cursor_row] + maxy - 1 < len(chap):
- chaps_pos[start + cursor_row] += 1
- screen.clear()
- elif ch in [curses.KEY_PPAGE]:
- if chaps_pos[start + cursor_row] > 0:
- chaps_pos[start + cursor_row] -= 1
- screen.clear()
-
- #elif ch in [curses.KEY_MOUSE]:
- # id, x, y, z, bstate = curses.getmouse()
- # line = screen.instr(y, 0)
- # mch = re.search('\[img="([^"]+)" "([^"]*)"\]', line)
- # if mch:
- # img_fl = mch.group(1)
-
- else:
- try:
- if chr(ch) == 'i':
- for img in images:
- err = open_image(screen, img, fl.read(img))
- if err:
- screen.addstr(0, 0, err, curses.A_REVERSE)
-
- # edit html
- elif chr(ch) == 'e':
-
- tmpfl = tempfile.NamedTemporaryFile(delete=False)
- tmpfl.write(html)
- tmpfl.close()
- run(screen, 'vim', tmpfl.name)
- with open(tmpfl.name) as changed:
- new_html = changed.read()
- os.unlink(tmpfl.name)
- if new_html != html:
- pass
- # write to zipfile?
-
- # go back to TOC
- screen.clear()
- break
-
- except (ValueError, IndexError):
- pass
-
- if __name__ == '__main__':
- import argparse
-
- parser = argparse.ArgumentParser(
- formatter_class=argparse.RawDescriptionHelpFormatter,
- description=__doc__,
- )
- parser.add_argument('-d', '--dump', action='store_true',
- help='dump EPUB to text')
- parser.add_argument('-c', '--cols', action='store', type=int, default=float("+inf"),
- help='Number of columns to wrap; default is no wrapping.')
- parser.add_argument('EPUB', help='view EPUB')
- args = parser.parse_args()
-
- if args.EPUB:
- if args.dump:
- dump_epub(args.EPUB, args.cols)
- else:
- try:
- curses.wrapper(curses_epub, args.EPUB)
- except KeyboardInterrupt:
- pass
|