Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

4 роки тому
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. #!/usr/bin/env python2
  2. '''
  3. python/curses epub reader. Requires BeautifulSoup
  4. Keyboard commands:
  5. Esc/q - quit
  6. Tab/Left/Right - toggle between TOC and chapter views
  7. TOC view:
  8. Up - up a line
  9. Down - down a line
  10. PgUp - up a page
  11. PgDown - down a page
  12. Chapter view:
  13. Up - up a page
  14. Down - down a page
  15. PgUp - up a line
  16. PgDown - down a line
  17. i - open images on page in web browser
  18. '''
  19. import curses.wrapper, curses.ascii
  20. import formatter, htmllib, locale, os, StringIO, re, readline, tempfile, zipfile
  21. import base64, webbrowser
  22. from BeautifulSoup import BeautifulSoup
  23. try:
  24. from fabulous import image
  25. import PIL
  26. except ImportError:
  27. images = False
  28. else:
  29. images = True
  30. locale.setlocale(locale.LC_ALL, 'en_US.utf-8')
  31. basedir = ''
  32. def run(screen, program, *args):
  33. curses.nocbreak()
  34. screen.keypad(0)
  35. curses.echo()
  36. pid = os.fork()
  37. if not pid:
  38. os.execvp(program, (program,) + args)
  39. os.wait()[0]
  40. curses.noecho()
  41. screen.keypad(1)
  42. curses.cbreak()
  43. def open_image(screen, name, s):
  44. ''' show images with PIL and fabulous '''
  45. if not images:
  46. screen.addstr(0, 0, "missing PIL or fabulous", curses.A_REVERSE)
  47. return
  48. ext = os.path.splitext(name)[1]
  49. screen.erase()
  50. screen.refresh()
  51. curses.setsyx(0, 0)
  52. image_file = tempfile.NamedTemporaryFile(suffix=ext, delete=False)
  53. image_file.write(s)
  54. image_file.close()
  55. try:
  56. print image.Image(image_file.name)
  57. except:
  58. print image_file.name
  59. finally:
  60. os.unlink(image_file.name)
  61. def textify(html_snippet, img_size=(80, 45), maxcol=72):
  62. ''' text dump of html '''
  63. class Parser(htmllib.HTMLParser):
  64. def anchor_end(self):
  65. self.anchor = None
  66. def handle_image(self, source, alt, ismap, alight, width, height):
  67. global basedir
  68. self.handle_data(
  69. '[img="{0}{1}" "{2}"]'.format(basedir, source, alt)
  70. )
  71. class Formatter(formatter.AbstractFormatter):
  72. pass
  73. class Writer(formatter.DumbWriter):
  74. def __init__(self, fl, maxcol=72):
  75. formatter.DumbWriter.__init__(self, fl)
  76. self.maxcol = maxcol
  77. def send_label_data(self, data):
  78. self.send_flowing_data(data)
  79. self.send_flowing_data(' ')
  80. o = StringIO.StringIO()
  81. p = Parser(Formatter(Writer(o, maxcol)))
  82. p.feed(html_snippet)
  83. p.close()
  84. return o.getvalue()
  85. def table_of_contents(fl):
  86. global basedir
  87. # find opf file
  88. soup = BeautifulSoup(fl.read('META-INF/container.xml'))
  89. opf = dict(soup.find('rootfile').attrs)['full-path']
  90. basedir = os.path.dirname(opf)
  91. if basedir:
  92. basedir = '{0}/'.format(basedir)
  93. soup = BeautifulSoup(fl.read(opf))
  94. # title
  95. yield (soup.find('dc:title').text, None)
  96. # all files, not in order
  97. x, ncx = {}, None
  98. for item in soup.find('manifest').findAll('item'):
  99. d = dict(item.attrs)
  100. x[d['id']] = '{0}{1}'.format(basedir, d['href'])
  101. if d['media-type'] == 'application/x-dtbncx+xml':
  102. ncx = '{0}{1}'.format(basedir, d['href'])
  103. # reading order, not all files
  104. y = []
  105. for item in soup.find('spine').findAll('itemref'):
  106. y.append(x[dict(item.attrs)['idref']])
  107. z = {}
  108. if ncx:
  109. # get titles from the toc
  110. soup = BeautifulSoup(fl.read(ncx))
  111. for navpoint in soup('navpoint'):
  112. k = navpoint.content.get('src', None)
  113. # strip off any anchor text
  114. k = k.split('#')[0]
  115. if k:
  116. z[k] = navpoint.navlabel.text
  117. # output
  118. for section in y:
  119. if section in z:
  120. yield (z[section].encode('utf-8'), section.encode('utf-8'))
  121. else:
  122. yield (u'', section.encode('utf-8').strip())
  123. def list_chaps(screen, chaps, start, length):
  124. for i, (title, src) in enumerate(chaps[start:start+length]):
  125. try:
  126. if start == 0:
  127. screen.addstr(i, 0, ' {0}'.format(title), curses.A_BOLD)
  128. else:
  129. screen.addstr(i, 0, '{0:-5} {1}'.format(start, title))
  130. except:
  131. pass
  132. start += 1
  133. screen.refresh()
  134. return i
  135. def check_epub(fl):
  136. if os.path.isfile(fl) and os.path.splitext(fl)[1].lower() == '.epub':
  137. return True
  138. def dump_epub(fl, maxcol=float("+inf")):
  139. if not check_epub(fl):
  140. return
  141. fl = zipfile.ZipFile(fl, 'r')
  142. chaps = [i for i in table_of_contents(fl)]
  143. for title, src in chaps:
  144. print title
  145. print '-' * len(title)
  146. if src:
  147. soup = BeautifulSoup(fl.read(src))
  148. print textify(
  149. unicode(soup.find('body')).encode('utf-8'),
  150. maxcol=maxcol,
  151. )
  152. print '\n'
  153. def curses_epub(screen, fl):
  154. if not check_epub(fl):
  155. return
  156. #curses.mousemask(curses.BUTTON1_CLICKED)
  157. fl = zipfile.ZipFile(fl, 'r')
  158. chaps = [i for i in table_of_contents(fl)]
  159. chaps_pos = [0 for i in chaps]
  160. start = 0
  161. cursor_row = 0
  162. # toc
  163. while True:
  164. curses.curs_set(1)
  165. maxy, maxx = screen.getmaxyx()
  166. if cursor_row >= maxy:
  167. cursor_row = maxy - 1
  168. len_chaps = list_chaps(screen, chaps, start, maxy)
  169. screen.move(cursor_row, 0)
  170. ch = screen.getch()
  171. # quit
  172. if ch == curses.ascii.ESC:
  173. return
  174. try:
  175. if chr(ch) == 'q':
  176. return
  177. except:
  178. pass
  179. # up/down line
  180. if ch in [curses.KEY_DOWN]:
  181. if start < len(chaps) - maxy:
  182. start += 1
  183. screen.clear()
  184. elif cursor_row < maxy - 1 and cursor_row < len_chaps:
  185. cursor_row += 1
  186. elif ch in [curses.KEY_UP]:
  187. if start > 0:
  188. start -= 1
  189. screen.clear()
  190. elif cursor_row > 0:
  191. cursor_row -= 1
  192. # up/down page
  193. elif ch in [curses.KEY_NPAGE]:
  194. if start + maxy - 1 < len(chaps):
  195. start += maxy - 1
  196. if len_chaps < maxy:
  197. start = len(chaps) - maxy
  198. screen.clear()
  199. elif ch in [curses.KEY_PPAGE]:
  200. if start > 0:
  201. start -= maxy - 1
  202. if start < 0:
  203. start = 0
  204. screen.clear()
  205. # to chapter
  206. elif ch in [curses.ascii.HT, curses.KEY_RIGHT, curses.KEY_LEFT]:
  207. if chaps[start + cursor_row][1]:
  208. html = fl.read(chaps[start + cursor_row][1])
  209. soup = BeautifulSoup(html)
  210. chap = textify(
  211. unicode(soup.find('body')).encode('utf-8'),
  212. img_size=screen.getmaxyx(),
  213. maxcol=screen.getmaxyx()[1]
  214. ).split('\n')
  215. else:
  216. chap = ''
  217. screen.clear()
  218. curses.curs_set(0)
  219. # chapter
  220. while True:
  221. maxy, maxx = screen.getmaxyx()
  222. images = []
  223. for i, line in enumerate(chap[
  224. chaps_pos[start + cursor_row]:
  225. chaps_pos[start + cursor_row] + maxy
  226. ]):
  227. try:
  228. screen.addstr(i, 0, line)
  229. mch = re.search('\[img="([^"]+)" "([^"]*)"\]', line)
  230. if mch:
  231. images.append(mch.group(1))
  232. except:
  233. pass
  234. screen.refresh()
  235. ch = screen.getch()
  236. # quit
  237. if ch == curses.ascii.ESC:
  238. return
  239. try:
  240. if chr(ch) == 'q':
  241. return
  242. except:
  243. pass
  244. # to TOC
  245. if ch in [curses.ascii.HT, curses.KEY_RIGHT, curses.KEY_LEFT]:
  246. screen.clear()
  247. break
  248. # up/down page
  249. elif ch in [curses.KEY_DOWN]:
  250. if chaps_pos[start + cursor_row] + maxy - 1 < len(chap):
  251. chaps_pos[start + cursor_row] += maxy - 1
  252. screen.clear()
  253. elif ch in [curses.KEY_UP]:
  254. if chaps_pos[start + cursor_row] > 0:
  255. chaps_pos[start + cursor_row] -= maxy - 1
  256. if chaps_pos[start + cursor_row] < 0:
  257. chaps_pos[start + cursor_row] = 0
  258. screen.clear()
  259. # up/down line
  260. elif ch in [curses.KEY_NPAGE]:
  261. if chaps_pos[start + cursor_row] + maxy - 1 < len(chap):
  262. chaps_pos[start + cursor_row] += 1
  263. screen.clear()
  264. elif ch in [curses.KEY_PPAGE]:
  265. if chaps_pos[start + cursor_row] > 0:
  266. chaps_pos[start + cursor_row] -= 1
  267. screen.clear()
  268. #elif ch in [curses.KEY_MOUSE]:
  269. # id, x, y, z, bstate = curses.getmouse()
  270. # line = screen.instr(y, 0)
  271. # mch = re.search('\[img="([^"]+)" "([^"]*)"\]', line)
  272. # if mch:
  273. # img_fl = mch.group(1)
  274. else:
  275. try:
  276. if chr(ch) == 'i':
  277. for img in images:
  278. err = open_image(screen, img, fl.read(img))
  279. if err:
  280. screen.addstr(0, 0, err, curses.A_REVERSE)
  281. # edit html
  282. elif chr(ch) == 'e':
  283. tmpfl = tempfile.NamedTemporaryFile(delete=False)
  284. tmpfl.write(html)
  285. tmpfl.close()
  286. run(screen, 'vim', tmpfl.name)
  287. with open(tmpfl.name) as changed:
  288. new_html = changed.read()
  289. os.unlink(tmpfl.name)
  290. if new_html != html:
  291. pass
  292. # write to zipfile?
  293. # go back to TOC
  294. screen.clear()
  295. break
  296. except (ValueError, IndexError):
  297. pass
  298. if __name__ == '__main__':
  299. import argparse
  300. parser = argparse.ArgumentParser(
  301. formatter_class=argparse.RawDescriptionHelpFormatter,
  302. description=__doc__,
  303. )
  304. parser.add_argument('-d', '--dump', action='store_true',
  305. help='dump EPUB to text')
  306. parser.add_argument('-c', '--cols', action='store', type=int, default=float("+inf"),
  307. help='Number of columns to wrap; default is no wrapping.')
  308. parser.add_argument('EPUB', help='view EPUB')
  309. args = parser.parse_args()
  310. if args.EPUB:
  311. if args.dump:
  312. dump_epub(args.EPUB, args.cols)
  313. else:
  314. try:
  315. curses.wrapper(curses_epub, args.EPUB)
  316. except KeyboardInterrupt:
  317. pass