A command line (CLI) program for monitoring and downloading 8chan threads. Licensed under MIT.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

db_methods.py 6.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. import os
  2. import shutil
  3. import webbrowser
  4. import humanize
  5. from lizard import db_model, time_methods, web_methods
  6. from lizard.db_model import Thread
  7. from lizard.file_io import path_to_database, ensure_directory_exists, timestamped_path_to_exported_threads_file
  8. from lizard.json_methods import time_of_last_reply
  9. from lizard.web_methods import parse_url, fetch_and_parse_thread_json, dump_thread_html, dump_thread_files
  10. conservative_refresh_criterion = 10
  11. def check_database():
  12. """ Checks the database. If a problem is found, prints an error message and terminates program.
  13. Watch out! This does NOT necessarily check every possible problem with the database. """
  14. p = path_to_database()
  15. if not os.path.exists(p):
  16. print('{} does not exist. Try running "lizard c" to create a new one.'.format(p))
  17. exit()
  18. # This only ensures that a table named thread exists, doesn't check if it has the right columns.
  19. if not Thread.table_exists():
  20. print('{} does not have the correct table. Try moving it elsewhere, and running "lizard c" to create a '
  21. 'fresh database.'.format(p))
  22. exit()
  23. def create_new_database():
  24. """Creates a new database. Will fail if database already exists."""
  25. p = path_to_database()
  26. ensure_directory_exists(os.path.dirname(p))
  27. if os.path.exists(p):
  28. print('{} already exists, doing nothing.'.format(p))
  29. else:
  30. print('Creating database at {}...'.format(p), end=' ')
  31. Thread.create_table()
  32. print('Done!')
  33. def backup_database():
  34. """Makes a copy of the current database."""
  35. if os.path.exists(path_to_database()):
  36. old_path = path_to_database()
  37. new_path = '{}.{}'.format(old_path, time_methods.timestamp_now())
  38. print('Backing up database to {}...'.format(new_path), end=' ')
  39. shutil.copy(old_path, new_path)
  40. print('Done!')
  41. else:
  42. print(path_to_database() + " doesn't exist, doing nothing.")
  43. def add_thread_to_db(url):
  44. board, thread_no, anchored_reply = parse_url(url)
  45. now = time_methods.timestamp_now()
  46. json = fetch_and_parse_thread_json(board, thread_no)
  47. if json:
  48. print('Adding thread.')
  49. last_reply_time = time_of_last_reply(json)
  50. db_model.Thread.create(
  51. original_url=url,
  52. board=board,
  53. thread_no=thread_no,
  54. anchored_reply=anchored_reply,
  55. last_refreshed=now,
  56. last_seen=now,
  57. last_reply_time=last_reply_time
  58. )
  59. dump_thread_html(board, thread_no)
  60. dump_thread_files(board, thread_no, json)
  61. else:
  62. print("Thread didn't load, doing nothing.")
  63. def remove_thread_by_id(board, thread_no):
  64. threads_to_remove = Thread.select().where(Thread.board == board and
  65. Thread.thread_no == thread_no)
  66. for t in threads_to_remove:
  67. t.delete_instance()
  68. print('{} entries removed.'.format(len(threads_to_remove)))
  69. def list_threads():
  70. threads = Thread.select().order_by(Thread.last_reply_time)
  71. now = time_methods.timestamp_now()
  72. format_string = '{} {:<50}{:<20}{:<20}{:<20}'
  73. print(format_string.format(' ', 'Thread URL', 'Last reply', 'Last seen', 'Refreshed'))
  74. print('-' * 113)
  75. for t in threads:
  76. if not t.is_up:
  77. mark = 'x'
  78. elif t.replies_to_anchored > 0:
  79. mark = 'Y'
  80. elif t.new_replies > 0:
  81. mark = '!'
  82. else:
  83. mark = ' '
  84. s = format_string.format(
  85. mark,
  86. t.original_url,
  87. humanize.naturaldelta(now - t.last_reply_time),
  88. humanize.naturaldelta(now - t.last_seen),
  89. humanize.naturaldelta(now - t.last_refreshed)
  90. )
  91. print(s)
  92. def refresh_all_threads(conservative=False):
  93. print('Getting list of threads from database...')
  94. threads = Thread.select().where(Thread.is_up)
  95. now = time_methods.timestamp_now()
  96. if conservative:
  97. print('Doing conservative refresh')
  98. # TODO: The more natural form of this expression is (now - Thread.last_refreshed) / (now - Thread.last_reply_time) > 0.01 but for some reason that always gets rounded to zero
  99. threads = threads.where((now - Thread.last_reply_time) / (now - Thread.last_refreshed) < conservative_refresh_criterion)
  100. print('Refreshing {} threads...'.format(len(threads)))
  101. for t in threads:
  102. json = fetch_and_parse_thread_json(t.board, t.thread_no)
  103. if json:
  104. t.last_reply_time = time_of_last_reply(json)
  105. if t.last_reply_time > t.last_refreshed:
  106. replies = json['posts'][1:]
  107. new_replies = [i for i in replies if int(i['last_modified']) > t.last_refreshed]
  108. replies_to_anchored = [i for i in new_replies if ('&gt;&gt;{}'.format(t.anchored_reply) in i['com'])]
  109. print('/{}/{} has {} new posts'.format(t.board, t.thread_no, len(new_replies), len(replies_to_anchored)), end='')
  110. print(" and {} >(You)'s".format(len(replies_to_anchored))) if len(replies_to_anchored) > 0 else print('!')
  111. t.new_replies = len(new_replies)
  112. t.oldest_new_reply = get_oldest_reply(new_replies)
  113. t.replies_to_anchored = len(replies_to_anchored)
  114. dump_thread_html(t.board, t.thread_no)
  115. dump_thread_files(t.board, t.thread_no, json)
  116. else:
  117. print("/{}/{} has 404'd.".format(t.board, t.thread_no))
  118. t.is_up = 0
  119. t.last_refreshed = now
  120. t.save()
  121. print('Done!')
  122. def open_threads_with_new_replies():
  123. threads = Thread.select().where(Thread.is_up and Thread.new_replies)
  124. print('Opening {} threads in browser.'.format(len(threads)))
  125. for t in threads:
  126. thread_url = web_methods.generate_thread_html_url(t.board, t.thread_no)
  127. if t.oldest_new_reply:
  128. thread_url += '#{}'.format(t.oldest_new_reply)
  129. webbrowser.open(thread_url)
  130. t.new_replies = 0
  131. t.replies_to_anchored = 0
  132. t.last_seen = time_methods.timestamp_now()
  133. t.save()
  134. def open_all_threads():
  135. threads = Thread.select()
  136. print('Opening {} threads in browser.'.format(len(threads)))
  137. for t in threads:
  138. thread_url = t.original_url
  139. webbrowser.open(thread_url)
  140. t.new_replies = 0
  141. t.last_seen = time_methods.timestamp_now()
  142. t.save()
  143. def remove_404_threads():
  144. threads = Thread.select().where(Thread.is_up == 0)
  145. print('Removing {} entries.'.format(len(threads)))
  146. for t in threads:
  147. t.delete_instance()
  148. def export_thread_list():
  149. threads = Thread.select()
  150. print('Exporting {} threads.'.format(len(threads)))
  151. with open(timestamped_path_to_exported_threads_file(), 'w') as f:
  152. for t in threads:
  153. f.write('lizard a "{}"\n'.format(t.original_url))