A command line (CLI) program for monitoring and downloading 8chan threads. Licensed under MIT.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

logic.py 8.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. import os
  2. import sqlite3
  3. import webbrowser
  4. import click
  5. import requests
  6. import wget
  7. import constants
  8. import helpers
  9. import humanize
  10. def refresh_all_threads():
  11. print('Getting list of threads...')
  12. db = sqlite3.connect(constants.database_path)
  13. threads = db.execute('SELECT original_url, board, thread_no, last_refreshed FROM threads WHERE is_404 = 0').fetchall()
  14. db.close()
  15. print('Refreshing all threads...')
  16. for t in threads:
  17. url = t[0]
  18. board = t[1]
  19. thread_no = t[2]
  20. last_refreshed = int(t[3])
  21. now = helpers.timestamp_now()
  22. db = sqlite3.connect(constants.database_path)
  23. db.execute('UPDATE threads SET last_refreshed=? WHERE original_url=?',
  24. (now, url))
  25. db.commit()
  26. db.close()
  27. r = requests.get(helpers.generate_thread_json_url(board, thread_no))
  28. if r.status_code == requests.codes.ok:
  29. j = r.json()
  30. db = sqlite3.connect(constants.database_path)
  31. db.execute('UPDATE threads SET last_reply_time=? WHERE original_url=?',
  32. (j['posts'][-1]['last_modified'], url))
  33. db.commit()
  34. db.close()
  35. last_modified = int(j['posts'][-1]['last_modified'])
  36. if last_modified > last_refreshed:
  37. print('{}/{} has new posts!'.format(board, thread_no))
  38. new_replies = [i for i in j['posts'][1:] if int(i['last_modified']) > last_refreshed]
  39. oldest_new_reply = min(new_replies, key=lambda x: int(x['last_modified']))
  40. db = sqlite3.connect(constants.database_path)
  41. db.execute('UPDATE threads SET new_replies=1, oldest_new_reply=? WHERE original_url=?',
  42. (oldest_new_reply['no'], url))
  43. db.commit()
  44. db.close()
  45. dump_thread(r.text, j, board, str(thread_no), str(last_modified))
  46. elif r.status_code == 404:
  47. print('{}/{} has gone 404.'.format(board, thread_no))
  48. db = sqlite3.connect(constants.database_path)
  49. db.execute('UPDATE threads SET last_refreshed = ?, is_404 = 1 WHERE original_url = ?', (now, url))
  50. db.commit()
  51. db.close()
  52. print('Done!')
  53. def list_threads():
  54. db = sqlite3.connect(constants.database_path)
  55. threads = db.execute('SELECT original_url, last_seen, new_replies, is_404, last_refreshed, last_reply_time FROM threads').fetchall()
  56. db.close()
  57. now = helpers.timestamp_now()
  58. format_string = '{}{} {:<50}{:<20}{:<20}{:<20}'
  59. print((format_string).format(' ', ' ', 'Thread URL', 'Last reply', 'Last seen', 'Refreshed'))
  60. print('-----------------------------------------------------------------------------------------------------------------')
  61. for t in threads:
  62. url = t[0]
  63. last_seen = int(t[1])
  64. new_replies = str(t[2])
  65. is_404 = str(t[3])
  66. last_refreshed = t[4]
  67. last_reply_time = t[5]
  68. s = format_string.format(
  69. '!' if new_replies == '1' else ' ',
  70. 'x' if is_404 == '1' else ' ',
  71. url,
  72. humanize.naturaldelta(now - last_reply_time),
  73. humanize.naturaldelta(now - last_seen),
  74. humanize.naturaldelta(now - last_refreshed)
  75. )
  76. print(s)
  77. def open_threads_with_new_replies():
  78. db = sqlite3.connect(constants.database_path)
  79. threads = db.execute('SELECT '
  80. 'original_url, '
  81. 'board, '
  82. 'thread_no, '
  83. 'oldest_new_reply, '
  84. 'new_replies '
  85. 'FROM threads WHERE is_404 = 0').fetchall()
  86. db.close()
  87. for t in threads:
  88. url = t[0]
  89. board = t[1]
  90. thread_no = str(t[2])
  91. oldest_new_reply = t[3]
  92. new_replies = t[4]
  93. now = helpers.timestamp_now()
  94. if new_replies:
  95. thread_url = helpers.generate_thread_html_url(board, thread_no)
  96. if oldest_new_reply:
  97. thread_url += '#q{}'.format(oldest_new_reply)
  98. webbrowser.open(thread_url)
  99. db = sqlite3.connect(constants.database_path)
  100. db.execute('UPDATE threads SET last_seen=?, new_replies=0 WHERE original_url=?', (now, url))
  101. db.commit()
  102. db.close()
  103. def initialize_database():
  104. if not os.path.exists(constants.cache_path): os.mkdir(constants.cache_path)
  105. if os.path.exists(constants.database_path):
  106. print('Table already exists.', end=' ')
  107. if click.confirm('Overwrite? (old file will be backed up)'):
  108. os.rename(constants.database_path, '{}.backup_{}'.format(constants.database_path, helpers.timestamp_now()))
  109. else:
  110. exit()
  111. # Creates database if it doesn't exist (which we have just ensured)
  112. db = sqlite3.connect(constants.database_path)
  113. db.execute('CREATE TABLE threads('
  114. 'original_url TEXT PRIMARY KEY, '
  115. 'board TEXT, '
  116. 'thread_no INTEGER, '
  117. 'anchored_reply INTEGER, '
  118. 'last_refreshed INTEGER, '
  119. 'last_seen INTEGER, '
  120. 'new_replies INTEGER, '
  121. 'oldest_new_reply STRING, '
  122. 'last_reply_time INTEGER,'
  123. 'is_404 INTEGER)')
  124. db.close()
  125. def add_thread_to_db(url):
  126. board, thread_no, anchored_reply = helpers.parse_url(url)
  127. db = sqlite3.connect(constants.database_path)
  128. try:
  129. with db:
  130. now = helpers.timestamp_now()
  131. db.execute('INSERT INTO threads(original_url, board, thread_no, anchored_reply, last_seen, last_refreshed, new_replies, is_404) '
  132. 'VALUES(?,?,?,?,?,?,0,0)', (url, board, thread_no, anchored_reply, now, now))
  133. print('Thread added.')
  134. except sqlite3.IntegrityError:
  135. print('This URL is already recorded. The database will not be altered but the thread will be dumped.')
  136. finally:
  137. db.close()
  138. dump_thread_by_url(url)
  139. def dump_thread_by_url(url):
  140. board, thread_no, null = helpers.parse_url(url)
  141. r = requests.get(helpers.generate_thread_json_url(board, thread_no))
  142. j = r.json()
  143. now = str(helpers.timestamp_now())
  144. dump_thread(r.text, j, board, thread_no, now)
  145. def remove_thread_by_id(board, thread_no):
  146. print('Removing /{}/{} ...'.format(board, thread_no))
  147. db = sqlite3.connect(constants.database_path)
  148. cursor = db.cursor()
  149. urls = cursor.execute('SELECT original_url FROM threads WHERE board = ? AND thread_no = ?', (board, thread_no)).fetchall()
  150. print('Deleting {} entries.'.format(len(urls)))
  151. cursor.execute('DELETE FROM threads WHERE board = ? AND thread_no = ?', (board, thread_no))
  152. db.commit()
  153. db.close()
  154. def dump_thread(raw_json, parsed_json, board, thread_no, last_seen):
  155. """Dumps the JSON of the thread with a timestamp, as well as all the files in it (if they don't already exist).
  156. """
  157. thread_dir = os.path.join(constants.cache_path, board, thread_no)
  158. files_dir = os.path.join(thread_dir, 'files')
  159. if not os.path.exists(files_dir):
  160. os.makedirs(files_dir)
  161. with open(os.path.join(thread_dir, last_seen + '.json'), 'w') as f:
  162. f.write(raw_json)
  163. wget.download(helpers.generate_thread_html_url(board, thread_no), os.path.join(thread_dir, last_seen + '.html'))
  164. print('') # wget fails to start a new line, mangling subsequent text output
  165. for p in parsed_json['posts']:
  166. if 'filename' in p:
  167. dump_file(p['tim'], p['ext'], p['filename'], files_dir)
  168. if 'extra_files' in p:
  169. for i in p['extra_files']:
  170. dump_file(i['tim'], i['ext'], i['filename'], files_dir)
  171. def dump_file(tim, ext, filename, files_dir):
  172. """Downloads a file from 8chan (unless it already exists at the given location).
  173. :param ext: Extension of the file including dot.
  174. :param filename: Original name of the file without extension.
  175. :param files_dir: Directory where the file should go. Will be created if it doesn't exist.
  176. :param tim: tim property from JSON; also the 8ch filename (without the extension).
  177. :return:
  178. """
  179. filename_8ch = tim + ext
  180. filename_original = filename + ext
  181. url = helpers.generate_file_url(filename_8ch)
  182. if not os.path.exists(os.path.join(files_dir, filename_original)):
  183. print('\tDownloading file {} ...'.format(url))
  184. wget.download(url, os.path.join(files_dir, filename_original))
  185. print('') # wget fails to start a new line, mangling subsequent text output