Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.


  1. #!/usr/bin/env python2
  2. '''
  3. Python/curses epub reader. Requires BeautifulSoup.
  4. Keyboard commands:
  5. Esc/q - quit
  6. Tab/Left/Right - toggle between TOC and chapter views
  7. TOC view:
  8. Up - up a line
  9. Down - down a line
  10. PgUp - up a page
  11. PgDown - down a page
  12. Home - first page
  13. End - last page
  14. [0-9] - go to chapter
  15. i - open images on page in web browser
  16. e - open source files with vim
  17. h - show help
  18. Chapter view:
  19. PgUp - up a page
  20. PgDown - down a page
  21. Up - up a line
  22. Down - down a line
  23. Home - first page
  24. End - last page
  25. '''
  26. import curses.wrapper, curses.ascii
  27. import formatter, htmllib, locale, os, StringIO, re, readline, tempfile, zipfile
  28. import mimetypes
  29. from time import time
  30. from math import log10, floor
  31. import base64, webbrowser
  32. from BeautifulSoup import BeautifulSoup
  33. try:
  34. from fabulous import image
  35. import PIL
  36. except ImportError:
  37. images = False
  38. else:
  39. images = True
  40. locale.setlocale(locale.LC_ALL, 'en_US.utf-8')
  41. basedir = ''
  42. parser = None
  43. def run(screen, program, *args):
  44. curses.nocbreak()
  45. screen.keypad(0)
  46. curses.echo()
  47. pid = os.fork()
  48. if not pid:
  49. os.execvp(program, (program,) + args)
  50. os.wait()[0]
  51. curses.noecho()
  52. screen.keypad(1)
  53. curses.cbreak()
  54. def open_image(screen, name, s):
  55. ''' show images with PIL and fabulous '''
  56. if not images:
  57. screen.addstr(0, 0, "missing PIL or fabulous", curses.A_REVERSE)
  58. return
  59. ext = os.path.splitext(name)[1]
  60. screen.erase()
  61. screen.refresh()
  62. curses.setsyx(0, 0)
  63. image_file = tempfile.NamedTemporaryFile(suffix=ext, delete=False)
  64. image_file.write(s)
  65. image_file.close()
  66. try:
  67. print image.Image(image_file.name)
  68. except:
  69. print image_file.name
  70. finally:
  71. os.unlink(image_file.name)
  72. def textify(html_snippet, img_size=(80, 45), maxcol=72, html_file=None):
  73. ''' text dump of html '''
  74. class Parser(htmllib.HTMLParser):
  75. def anchor_end(self):
  76. self.anchor = None
  77. def handle_image(self, source, alt, ismap, alight, width, height):
  78. if os.path.isabs(source):
  79. src = source
  80. else:
  81. src = os.path.normpath(
  82. os.path.join(os.path.dirname(html_file), source)
  83. )
  84. self.handle_data(
  85. '[img="{0}" "{1}"]'.format(src, alt)
  86. )
  87. class Formatter(formatter.AbstractFormatter):
  88. pass
  89. class Writer(formatter.DumbWriter):
  90. def __init__(self, fl, maxcol=72):
  91. formatter.DumbWriter.__init__(self, fl)
  92. self.maxcol = maxcol
  93. def send_label_data(self, data):
  94. self.send_flowing_data(data)
  95. self.send_flowing_data(' ')
  96. o = StringIO.StringIO()
  97. p = Parser(Formatter(Writer(o, maxcol)))
  98. p.feed(html_snippet)
  99. p.close()
  100. return o.getvalue()
  101. def table_of_contents(fl):
  102. global basedir
  103. # find opf file
  104. soup = BeautifulSoup(fl.read('META-INF/container.xml'),
  105. convertEntities=BeautifulSoup.HTML_ENTITIES)
  106. opf = dict(soup.find('rootfile').attrs)['full-path']
  107. basedir = os.path.dirname(opf)
  108. if basedir:
  109. basedir = '{0}/'.format(basedir)
  110. soup = BeautifulSoup(fl.read(opf),
  111. convertEntities=BeautifulSoup.HTML_ENTITIES)
  112. # title
  113. yield (soup.find('dc:title').text, None)
  114. # all files, not in order
  115. x, ncx = {}, None
  116. for item in soup.find('manifest').findAll('item'):
  117. d = dict(item.attrs)
  118. x[d['id']] = '{0}{1}'.format(basedir, d['href'])
  119. if d['media-type'] == 'application/x-dtbncx+xml':
  120. ncx = '{0}{1}'.format(basedir, d['href'])
  121. # reading order, not all files
  122. y = []
  123. for item in soup.find('spine').findAll('itemref'):
  124. y.append(x[dict(item.attrs)['idref']])
  125. z = {}
  126. if ncx:
  127. # get titles from the toc
  128. soup = BeautifulSoup(fl.read(ncx),
  129. convertEntities=BeautifulSoup.HTML_ENTITIES)
  130. for navpoint in soup('navpoint'):
  131. k = navpoint.content.get('src', None)
  132. # strip off any anchor text
  133. k = k.split('#')[0]
  134. if k:
  135. z['{0}{1}'.format(basedir, k)] = navpoint.navlabel.text
  136. # output
  137. for section in y:
  138. if section in z:
  139. yield (z[section].encode('utf-8'), section.encode('utf-8'))
  140. else:
  141. yield (u'', section.encode('utf-8').strip())
  142. def list_chaps(screen, chaps, start, length):
  143. for i, (title, src) in enumerate(chaps[start:start+length]):
  144. try:
  145. if start == 0:
  146. screen.addstr(i, 0, ' {0}'.format(title), curses.A_BOLD)
  147. else:
  148. screen.addstr(i, 0, '{0:-5} {1}'.format(start, title))
  149. except:
  150. pass
  151. start += 1
  152. screen.refresh()
  153. return i
  154. def check_epub(fl):
  155. return os.path.isfile(fl) and \
  156. mimetypes.guess_type(fl)[0] == 'application/epub+zip'
  157. def dump_epub(fl, maxcol=float("+inf")):
  158. if not check_epub(fl):
  159. return
  160. fl = zipfile.ZipFile(fl, 'r')
  161. chaps = [i for i in table_of_contents(fl)]
  162. for title, src in chaps:
  163. print title
  164. print '-' * len(title)
  165. if src:
  166. soup = BeautifulSoup(fl.read(src),
  167. convertEntities=BeautifulSoup.HTML_ENTITIES)
  168. print textify(
  169. unicode(soup.find('body')).encode('utf-8'),
  170. maxcol = maxcol,
  171. html_file = src
  172. )
  173. print '\n'
  174. def curses_epub(screen, fl, info=True, maxcol=float("+inf")):
  175. if not check_epub(fl):
  176. return
  177. fl = zipfile.ZipFile(fl, 'r')
  178. chaps = [i for i in table_of_contents(fl)]
  179. chaps_pos = [0 for i in chaps]
  180. start = 0
  181. cursor_row = 0
  182. n_chaps = len(chaps) - 1
  183. cur_chap = None
  184. cur_text = None
  185. if info:
  186. info_cols = 2
  187. else:
  188. info_cols = 0
  189. maxy, maxx = screen.getmaxyx()
  190. if maxcol is not None and maxcol > 0 and maxcol < maxx:
  191. maxx = maxcol
  192. # toc
  193. while True:
  194. if cur_chap is None:
  195. curses.curs_set(1)
  196. if cursor_row >= maxy:
  197. cursor_row = maxy - 1
  198. len_chaps = list_chaps(screen, chaps, start, maxy)
  199. screen.move(cursor_row, 0)
  200. else:
  201. if cur_text is None:
  202. if chaps[cur_chap][1]:
  203. html = fl.read(chaps[cur_chap][1])
  204. soup = BeautifulSoup(html,
  205. convertEntities=BeautifulSoup.HTML_ENTITIES)
  206. cur_text = textify(
  207. unicode(soup.find('body')).encode('utf-8'),
  208. img_size = (maxy, maxx),
  209. maxcol = maxx,
  210. html_file = chaps[cur_chap][1]
  211. ).split('\n')
  212. else:
  213. cur_text = ''
  214. images = []
  215. # Current status info
  216. # Total number of lines
  217. n_lines = len(cur_text)
  218. if info:
  219. # Title
  220. title = chaps[cur_chap][0]
  221. # Total number of pages
  222. n_pages = n_lines / (maxy - 2) + 1
  223. # Truncate title if too long. Add ellipsis at the end
  224. if len(title) > maxx - 29:
  225. title = title[0:maxx - 30] + u'\u2026'.encode('utf-8')
  226. spaces = ''
  227. else:
  228. spaces = ''.join([' '] * (maxx - len(title) - 30))
  229. screen.clear()
  230. curses.curs_set(0)
  231. for i, line in enumerate(cur_text[chaps_pos[cur_chap]:
  232. chaps_pos[cur_chap] + maxy - info_cols]):
  233. try:
  234. screen.addstr(i, 0, line)
  235. mch = re.search('\[img="([^"]+)" "([^"]*)"\]', line)
  236. if mch:
  237. images.append(mch.group(1))
  238. except:
  239. pass
  240. if info:
  241. # Current status info
  242. # Current (last) line number
  243. cur_line = min([n_lines,chaps_pos[cur_chap]+maxy-info_cols])
  244. # Current page
  245. cur_page = (cur_line - 1) / (maxy - 2) + 1
  246. # Current position (%)
  247. cur_pos = 100 * (float(cur_line) / n_lines)
  248. try:
  249. screen.addstr(maxy - 1, 0,
  250. '%s (%2d/%2d) %s Page %2d/%2d (%5.1f%%)' % (
  251. title,
  252. cur_chap,
  253. n_chaps,
  254. spaces,
  255. cur_page,
  256. n_pages,
  257. cur_pos))
  258. except:
  259. pass
  260. screen.refresh()
  261. ch = screen.getch()
  262. if cur_chap is None:
  263. try:
  264. # Set getch to non-blocking
  265. screen.nodelay(1)
  266. # Get int from input
  267. n = int(chr(ch))
  268. # Maximim number one can compute with the same number of digits
  269. # as the number of chapters
  270. # Ex.: for 80 chapters, max_n = 99
  271. max_n = int(10 ** floor(log10(n_chaps) + 1) - 1)
  272. # Break on non-digit input
  273. while chr(ch).isdigit():
  274. delay = time()
  275. ch = -1
  276. # Wait for next character for 0.35 seconds
  277. while ch == -1 and time() - delay < 0.35:
  278. ch = screen.getch()
  279. # If user has input a digit
  280. if ch != -1 and chr(ch).isdigit():
  281. n = n * 10 + int(chr(ch))
  282. # User requested a non-existent chapter, bail
  283. if n > n_chaps:
  284. break
  285. # When we're on the character limit, or no digit was input
  286. # go to chapter
  287. elif n * 10 > max_n or ch == -1:
  288. cur_chap = n
  289. cur_text = None
  290. # Position cursor in middle of screen
  291. # Adjust start acordingly
  292. start = cur_chap - maxy / 2
  293. if start > n_chaps - maxy + 1:
  294. start = n_chaps - maxy + 1
  295. if start < 0:
  296. start = 0
  297. cursor_row = cur_chap - start
  298. break
  299. except:
  300. pass
  301. finally:
  302. screen.nodelay(0)
  303. # help
  304. try:
  305. if chr(ch) == 'h':
  306. curses.curs_set(0)
  307. screen.clear()
  308. for i, line in enumerate(parser.format_help().split('\n')):
  309. screen.addstr(i, 0, line)
  310. screen.refresh()
  311. screen.getch()
  312. screen.clear()
  313. # quit
  314. if ch == curses.ascii.ESC or chr(ch) == 'q':
  315. return
  316. if chr(ch) == 'i':
  317. for img in images:
  318. err = open_image(screen, img, fl.read(img))
  319. if err:
  320. screen.addstr(0, 0, err, curses.A_REVERSE)
  321. # edit html
  322. elif chr(ch) == 'e':
  323. tmpfl = tempfile.NamedTemporaryFile(delete=False)
  324. tmpfl.write(html)
  325. tmpfl.close()
  326. run(screen, 'vim', tmpfl.name)
  327. with open(tmpfl.name) as changed:
  328. new_html = changed.read()
  329. os.unlink(tmpfl.name)
  330. if new_html != html:
  331. pass
  332. # write to zipfile?
  333. # go back to TOC
  334. screen.clear()
  335. except (ValueError, IndexError):
  336. pass
  337. # up/down line
  338. if ch in [curses.KEY_DOWN]:
  339. if cur_chap is None:
  340. if start < len(chaps) - maxy:
  341. start += 1
  342. screen.clear()
  343. elif cursor_row < maxy - 1 and cursor_row < len_chaps:
  344. cursor_row += 1
  345. else:
  346. if chaps_pos[cur_chap] + maxy - info_cols < \
  347. n_lines + maxy - info_cols - 1:
  348. chaps_pos[cur_chap] += 1
  349. screen.clear()
  350. elif ch in [curses.KEY_UP]:
  351. if cur_chap is None:
  352. if start > 0:
  353. start -= 1
  354. screen.clear()
  355. elif cursor_row > 0:
  356. cursor_row -= 1
  357. else:
  358. if chaps_pos[cur_chap] > 0:
  359. chaps_pos[cur_chap] -= 1
  360. screen.clear()
  361. # up/down page
  362. elif ch in [curses.KEY_NPAGE]:
  363. if cur_chap is None:
  364. if start + maxy - 1 < len(chaps):
  365. start += maxy - 1
  366. if len_chaps < maxy:
  367. start = len(chaps) - maxy
  368. screen.clear()
  369. else:
  370. if chaps_pos[cur_chap] + maxy - info_cols < n_lines:
  371. chaps_pos[cur_chap] += maxy - info_cols
  372. elif cur_chap < n_chaps:
  373. cur_chap += 1
  374. cur_text = None
  375. screen.clear()
  376. elif ch in [curses.KEY_PPAGE]:
  377. if cur_chap is None:
  378. if start > 0:
  379. start -= maxy - 1
  380. if start < 0:
  381. start = 0
  382. screen.clear()
  383. else:
  384. if chaps_pos[cur_chap] > 0:
  385. chaps_pos[cur_chap] -= maxy - info_cols
  386. if chaps_pos[cur_chap] < 0:
  387. chaps_pos[cur_chap] = 0
  388. elif cur_chap > 0:
  389. cur_chap -= 1
  390. cur_text = None
  391. screen.clear()
  392. # Position cursor in first chapter / go to first page
  393. elif ch in [curses.KEY_HOME]:
  394. if cur_chap is None:
  395. start = 0
  396. cursor_row = 0
  397. else:
  398. chaps_pos[cur_chap] = 0
  399. screen.clear()
  400. # Position cursor in last chapter / go to last page
  401. elif ch in [curses.KEY_END]:
  402. if cur_chap is None:
  403. cursor_row = min(n_chaps, maxy)
  404. start = max(0, n_chaps - cursor_row)
  405. else:
  406. chaps_pos[cur_chap] = n_lines - n_lines % (maxy - info_cols)
  407. cur_text = None
  408. screen.clear()
  409. # to chapter
  410. elif ch in [curses.ascii.HT, curses.KEY_RIGHT, curses.KEY_LEFT]:
  411. if cur_chap is None and start + cursor_row != 0:
  412. # Current chapter number
  413. cur_chap = start + cursor_row
  414. cur_text = None
  415. else:
  416. cur_chap = None
  417. cur_text = None
  418. screen.clear()
  419. if __name__ == '__main__':
  420. import argparse
  421. parser = argparse.ArgumentParser(
  422. formatter_class = argparse.RawDescriptionHelpFormatter,
  423. description = __doc__,
  424. )
  425. parser.add_argument('-d', '--dump',
  426. action = 'store_true',
  427. help = 'dump EPUB to text')
  428. parser.add_argument('-c', '--cols',
  429. action = 'store',
  430. type = int,
  431. default = float("+inf"),
  432. help = 'Number of columns to wrap; default is no wrapping.')
  433. parser.add_argument('-I', '--no-info',
  434. action = 'store_true',
  435. default = False,
  436. help = 'Do not display chapter/page info. Defaults to false.')
  437. parser.add_argument('EPUB', help='view EPUB')
  438. args = parser.parse_args()
  439. if args.EPUB:
  440. if args.dump:
  441. dump_epub(args.EPUB, args.cols)
  442. else:
  443. try:
  444. curses.wrapper(curses_epub,args.EPUB,not args.no_info,args.cols)
  445. except KeyboardInterrupt:
  446. pass