A command line (CLI) program for monitoring and downloading 8chan threads. Licensed under MIT.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

db_methods.py 6.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. import os
  2. import shutil
  3. import webbrowser
  4. import humanize
  5. import db_model
  6. import file_io
  7. import time_methods
  8. import web_methods
  9. from constants import conservative_refresh_criterion
  10. from db_model import Thread, db
  11. from file_io import path_to_database, ensure_directory_exists, timestamped_path_to_exported_threads_file
  12. from json_methods import time_of_last_reply, get_oldest_reply
  13. from web_methods import fetch_and_parse_thread_json, dump_thread_html, dump_thread_files, parse_url
  14. def create_new_database():
  15. """Creates a new database. Will fail if database already exists."""
  16. p = path_to_database()
  17. ensure_directory_exists(os.path.dirname(p))
  18. if os.path.exists(p):
  19. print('{} already exists, doing nothing.'.format(os.path.abspath(p)))
  20. else:
  21. print('Creating database at {}...'.format(os.path.abspath(p)), end=' ')
  22. Thread.create_table()
  23. print('Done!')
  24. def backup_database():
  25. """Makes a copy of the current database."""
  26. if os.path.exists(path_to_database()):
  27. old_path = path_to_database()
  28. new_path = '{}.{}'.format(old_path, time_methods.timestamp_now())
  29. print('Backing up database to {}...'.format(new_path), end=' ')
  30. shutil.copy(old_path, new_path)
  31. print('Done!')
  32. else:
  33. print(path_to_database() + " doesn't exist, doing nothing.")
  34. def add_thread_to_db(url):
  35. board, thread_no, anchored_reply = parse_url(url)
  36. now = time_methods.timestamp_now()
  37. json = fetch_and_parse_thread_json(board, thread_no)
  38. if json:
  39. print('Adding thread.')
  40. last_reply_time = time_of_last_reply(json)
  41. db_model.Thread.create(
  42. original_url=url,
  43. board=board,
  44. thread_no=thread_no,
  45. anchored_reply=anchored_reply,
  46. last_refreshed=now,
  47. last_seen=now,
  48. last_reply_time=last_reply_time
  49. )
  50. dump_thread_html(board, thread_no)
  51. dump_thread_files(board, thread_no, json)
  52. else:
  53. print("Thread didn't load, doing nothing.")
  54. def remove_thread_by_id(board, thread_no):
  55. threads_to_remove = Thread.select().where(Thread.board == board and
  56. Thread.thread_no == thread_no)
  57. for t in threads_to_remove:
  58. t.delete_instance()
  59. print('{} entries removed.'.format(len(threads_to_remove)))
  60. def list_threads():
  61. threads = Thread.select().order_by(Thread.last_reply_time)
  62. now = time_methods.timestamp_now()
  63. format_string = '{} {:<50}{:<20}{:<20}{:<20}'
  64. print(format_string.format(' ', 'Thread URL', 'Last reply', 'Last seen', 'Refreshed'))
  65. print('-' * 113)
  66. for t in threads:
  67. if not t.is_up:
  68. mark = 'x'
  69. elif t.replies_to_anchored > 0:
  70. mark = 'Y'
  71. elif t.new_replies > 0:
  72. mark = '!'
  73. else:
  74. mark = ' '
  75. s = format_string.format(
  76. mark,
  77. t.original_url,
  78. humanize.naturaldelta(now - t.last_reply_time),
  79. humanize.naturaldelta(now - t.last_seen),
  80. humanize.naturaldelta(now - t.last_refreshed)
  81. )
  82. print(s)
  83. def refresh_all_threads(conservative=False):
  84. print('Getting list of threads from database...')
  85. threads = Thread.select().where(Thread.is_up)
  86. now = time_methods.timestamp_now()
  87. if conservative:
  88. print('Doing conservative refresh')
  89. # TODO: The more natural form of this expression is (now - Thread.last_refreshed) / (now - Thread.last_reply_time) > 0.01 but for some reason that always gets rounded to zero
  90. threads = threads.where((now - Thread.last_reply_time) / (now - Thread.last_refreshed) < conservative_refresh_criterion)
  91. print('Refreshing {} threads...'.format(len(threads)))
  92. for t in threads:
  93. json = fetch_and_parse_thread_json(t.board, t.thread_no)
  94. if json:
  95. t.last_reply_time = time_of_last_reply(json)
  96. if t.last_reply_time > t.last_refreshed:
  97. replies = json['posts'][1:]
  98. new_replies = [i for i in replies if int(i['last_modified']) > t.last_refreshed]
  99. replies_to_anchored = [i for i in new_replies if ('&gt;&gt;{}'.format(t.anchored_reply) in i['com'])]
  100. print('/{}/{} has {} new posts'.format(t.board, t.thread_no, len(new_replies), len(replies_to_anchored)), end='')
  101. print(" and {} >(You)'s".format(len(replies_to_anchored))) if len(replies_to_anchored) > 0 else print('!')
  102. t.new_replies = len(new_replies)
  103. t.oldest_new_reply = get_oldest_reply(new_replies)
  104. t.replies_to_anchored = len(replies_to_anchored)
  105. dump_thread_html(t.board, t.thread_no)
  106. dump_thread_files(t.board, t.thread_no, json)
  107. else:
  108. print("/{}/{} has 404'd.".format(t.board, t.thread_no))
  109. t.is_up = 0
  110. t.last_refreshed = now
  111. t.save()
  112. print('Done!')
  113. def open_threads_with_new_replies():
  114. threads = Thread.select().where(Thread.is_up and Thread.new_replies)
  115. print('Opening {} threads in browser.'.format(len(threads)))
  116. for t in threads:
  117. thread_url = web_methods.generate_thread_html_url(t.board, t.thread_no)
  118. if t.oldest_new_reply:
  119. thread_url += '#{}'.format(t.oldest_new_reply)
  120. webbrowser.open(thread_url)
  121. t.new_replies = 0
  122. t.replies_to_anchored = 0
  123. t.last_seen = time_methods.timestamp_now()
  124. t.save()
  125. def open_all_threads():
  126. threads = Thread.select()
  127. print('Opening {} threads in browser.'.format(len(threads)))
  128. for t in threads:
  129. thread_url = t.original_url
  130. webbrowser.open(thread_url)
  131. t.new_replies = 0
  132. t.last_seen = time_methods.timestamp_now()
  133. t.save()
  134. def remove_404_threads():
  135. threads = Thread.select().where(Thread.is_up == 0)
  136. print('Removing {} entries.'.format(len(threads)))
  137. for t in threads:
  138. t.delete_instance()
  139. def export_thread_list():
  140. threads = Thread.select()
  141. print('Exporting {} threads.'.format(len(threads)))
  142. with open(timestamped_path_to_exported_threads_file(), 'w') as f:
  143. for t in threads:
  144. f.write('lizard a "{}"\n'.format(t.original_url))