A command line (CLI) program for monitoring and downloading 8chan threads. Licensed under MIT.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

214 lines
7.3 KiB

import os
import shutil
import webbrowser
import humanize
from lizard import db_model, time_methods, web_methods
from lizard.db_model import Thread
from lizard.file_io import path_to_database, ensure_directory_exists, timestamped_path_to_exported_threads_file
from lizard.json_methods import time_of_last_reply, get_oldest_reply
from lizard.web_methods import parse_url, fetch_and_parse_thread_json, dump_thread_html, dump_thread_files
conservative_refresh_criterion = 10
def check_database():
""" Checks the database. If a problem is found, prints an error message and terminates program.
Watch out! This does NOT necessarily check every possible problem with the database. """
p = path_to_database()
if not os.path.exists(p):
print('{} does not exist. Try running "lizard c" to create a new one.'.format(p))
exit()
# This only ensures that a table named thread exists, doesn't check if it has the right columns.
if not Thread.table_exists():
print('{} does not have the correct table. Try moving it elsewhere, and running "lizard c" to create a '
'fresh database.'.format(p))
exit()
def create_new_database():
"""Creates a new database. Will fail if database already exists."""
p = path_to_database()
ensure_directory_exists(os.path.dirname(p))
if os.path.exists(p):
print('{} already exists, doing nothing.'.format(p))
else:
print('Creating database at {}...'.format(p), end=' ')
Thread.create_table()
print('Done!')
def backup_database():
"""Makes a copy of the current database."""
if os.path.exists(path_to_database()):
old_path = path_to_database()
new_path = '{}.{}'.format(old_path, time_methods.timestamp_now())
print('Backing up database to {}...'.format(new_path), end=' ')
shutil.copy(old_path, new_path)
print('Done!')
else:
print(path_to_database() + " doesn't exist, doing nothing.")
def add_thread_to_db(url):
board, thread_no, anchored_reply = parse_url(url)
now = time_methods.timestamp_now()
json = fetch_and_parse_thread_json(board, thread_no)
if json:
print('Adding thread.')
last_reply_time = time_of_last_reply(json)
db_model.Thread.create(
original_url=url,
board=board,
thread_no=thread_no,
anchored_reply=anchored_reply,
last_refreshed=now,
last_seen=now,
last_reply_time=last_reply_time
)
onion = url.startswith('http://oxwugzccvk3dk6tj.onion')
dump_thread_html(board, thread_no, use_onion_link=onion)
dump_thread_files(board, thread_no, json, use_onion_link=onion)
else:
print("Thread didn't load, doing nothing.")
def remove_thread_by_id(board, thread_no):
threads_to_remove = Thread.select().where(Thread.board == board and
Thread.thread_no == thread_no)
for t in threads_to_remove:
t.delete_instance()
print('{} entries removed.'.format(len(threads_to_remove)))
def list_threads(interesting_only=False):
""" Lists the threads currently being watched as a table.
An "interesting" thread is one in which some activity has happened, such as new replies or going 404. These will
be indicated by a symbol.
"""
threads = Thread.select().order_by(Thread.last_reply_time)
now = time_methods.timestamp_now()
format_string = '{} {:<50}{:<20}{:<20}{:<20}'
print(format_string.format(' ', 'Thread URL', 'Last reply', 'Last seen', 'Refreshed'))
print('-' * 113)
for t in threads:
if not t.is_up:
mark = 'x'
elif t.replies_to_anchored > 0:
mark = 'Y'
elif t.new_replies > 0:
mark = '!'
else:
mark = ' '
if (mark != ' ') or (not interesting_only):
s = format_string.format(
mark,
t.original_url,
humanize.naturaldelta(now - t.last_reply_time),
humanize.naturaldelta(now - t.last_seen),
humanize.naturaldelta(now - t.last_refreshed)
)
print(s)
def refresh_all_threads(conservative=False, use_onion_link=False):
print('Getting list of threads from database...')
threads = Thread.select().where(Thread.is_up)
now = time_methods.timestamp_now()
if conservative:
print('Doing conservative refresh')
# TODO: The more natural form of this expression is (now - Thread.last_refreshed) / (now - Thread.last_reply_time) > 0.01 but for some reason that always gets rounded to zero
threads = threads.where(
(now - Thread.last_reply_time) / (now - Thread.last_refreshed) < conservative_refresh_criterion)
print('Refreshing {} threads...'.format(len(threads)))
for t in threads:
json = fetch_and_parse_thread_json(t.board, t.thread_no, use_onion_link=use_onion_link)
if json:
t.last_reply_time = time_of_last_reply(json)
if t.last_reply_time > t.last_refreshed:
replies = json['posts'][1:]
new_replies = [i for i in replies if int(i['last_modified']) > t.last_refreshed]
replies_to_anchored = [i for i in new_replies if ('&gt;&gt;{}'.format(t.anchored_reply) in i['com'])]
print(
'/{}/{} has {} new posts'.format(t.board, t.thread_no, len(new_replies), len(replies_to_anchored)),
end='')
print(" and {} >(You)'s".format(len(replies_to_anchored))) if len(replies_to_anchored) > 0 else print(
'!')
t.new_replies = len(new_replies)
t.oldest_new_reply = get_oldest_reply(new_replies)
t.replies_to_anchored = len(replies_to_anchored)
dump_thread_html(t.board, t.thread_no, use_onion_link=use_onion_link)
dump_thread_files(t.board, t.thread_no, json, use_onion_link=use_onion_link)
else:
print("/{}/{} has 404'd.".format(t.board, t.thread_no))
t.is_up = 0
t.last_refreshed = now
t.save()
print('Done!')
def open_threads(only_threads_with_new_replies=True, use_onion_link=False):
if not only_threads_with_new_replies:
threads = Thread.select()
else:
threads = Thread.select().where(Thread.is_up and Thread.new_replies)
print('Opening {} threads in browser.'.format(len(threads)))
for t in threads:
thread_url = web_methods.generate_thread_html_url(t.board, t.thread_no, use_onion_link=use_onion_link)
if t.oldest_new_reply:
thread_url += '#{}'.format(t.oldest_new_reply)
webbrowser.open(thread_url)
t.new_replies = 0
t.replies_to_anchored = 0
t.last_seen = time_methods.timestamp_now()
t.save()
def remove_404_threads():
threads = Thread.select().where(Thread.is_up == 0)
print('Removing {} entries.'.format(len(threads)))
for t in threads:
t.delete_instance()
def export_thread_list():
threads = Thread.select()
print('Exporting {} threads.'.format(len(threads)))
with open(timestamped_path_to_exported_threads_file(), 'w') as f:
for t in threads:
f.write('lizard a "{}"\n'.format(t.original_url))