|
|
@@ -0,0 +1,240 @@ |
|
|
|
import os |
|
|
|
import sqlite3 |
|
|
|
import webbrowser |
|
|
|
import click |
|
|
|
import requests |
|
|
|
import wget |
|
|
|
import constants |
|
|
|
import helpers |
|
|
|
import humanize |
|
|
|
|
|
|
|
|
|
|
|
def refresh_all_threads(): |
|
|
|
print('Getting list of threads...') |
|
|
|
|
|
|
|
db = sqlite3.connect(constants.database_path) |
|
|
|
threads = db.execute('SELECT original_url, board, thread_no, last_refreshed FROM threads WHERE is_404 = 0').fetchall() |
|
|
|
db.close() |
|
|
|
|
|
|
|
print('Refreshing all threads...') |
|
|
|
|
|
|
|
for t in threads: |
|
|
|
url = t[0] |
|
|
|
board = t[1] |
|
|
|
thread_no = t[2] |
|
|
|
last_refreshed = int(t[3]) |
|
|
|
now = helpers.timestamp_now() |
|
|
|
|
|
|
|
db = sqlite3.connect(constants.database_path) |
|
|
|
db.execute('UPDATE threads SET last_refreshed=? WHERE original_url=?', |
|
|
|
(now, url)) |
|
|
|
db.commit() |
|
|
|
db.close() |
|
|
|
|
|
|
|
r = requests.get(helpers.generate_thread_json_url(board, thread_no)) |
|
|
|
if r.status_code == requests.codes.ok: |
|
|
|
j = r.json() |
|
|
|
|
|
|
|
db = sqlite3.connect(constants.database_path) |
|
|
|
db.execute('UPDATE threads SET last_reply_time=? WHERE original_url=?', |
|
|
|
(j['posts'][-1]['last_modified'], url)) |
|
|
|
db.commit() |
|
|
|
db.close() |
|
|
|
|
|
|
|
last_modified = int(j['posts'][-1]['last_modified']) |
|
|
|
if last_modified > last_refreshed: |
|
|
|
print('{}/{} has new posts!'.format(board, thread_no)) |
|
|
|
|
|
|
|
new_replies = [i for i in j['posts'][1:] if int(i['last_modified']) > last_refreshed] |
|
|
|
oldest_new_reply = min(new_replies, key=lambda x: int(x['last_modified'])) |
|
|
|
|
|
|
|
db = sqlite3.connect(constants.database_path) |
|
|
|
db.execute('UPDATE threads SET new_replies=1, oldest_new_reply=? WHERE original_url=?', |
|
|
|
(oldest_new_reply['no'], url)) |
|
|
|
db.commit() |
|
|
|
db.close() |
|
|
|
|
|
|
|
dump_thread(r.text, j, board, str(thread_no), str(last_modified)) |
|
|
|
elif r.status_code == 404: |
|
|
|
print('{}/{} has gone 404.'.format(board, thread_no)) |
|
|
|
|
|
|
|
db = sqlite3.connect(constants.database_path) |
|
|
|
db.execute('UPDATE threads SET last_refreshed = ?, is_404 = 1 WHERE original_url = ?', (now, url)) |
|
|
|
db.commit() |
|
|
|
db.close() |
|
|
|
|
|
|
|
print('Done!') |
|
|
|
|
|
|
|
|
|
|
|
def list_threads(): |
|
|
|
db = sqlite3.connect(constants.database_path) |
|
|
|
threads = db.execute('SELECT original_url, last_seen, new_replies, is_404, last_refreshed, last_reply_time FROM threads').fetchall() |
|
|
|
db.close() |
|
|
|
|
|
|
|
now = helpers.timestamp_now() |
|
|
|
|
|
|
|
format_string = '{}{} {:<50}{:<20}{:<20}{:<20}' |
|
|
|
print((format_string).format(' ', ' ', 'Thread URL', 'Last reply', 'Last seen', 'Refreshed')) |
|
|
|
print('-----------------------------------------------------------------------------------------------------------------') |
|
|
|
|
|
|
|
for t in threads: |
|
|
|
url = t[0] |
|
|
|
last_seen = int(t[1]) |
|
|
|
new_replies = str(t[2]) |
|
|
|
is_404 = str(t[3]) |
|
|
|
last_refreshed = t[4] |
|
|
|
last_reply_time = t[5] |
|
|
|
|
|
|
|
s = format_string.format( |
|
|
|
'!' if new_replies == '1' else ' ', |
|
|
|
'x' if is_404 == '1' else ' ', |
|
|
|
url, |
|
|
|
humanize.naturaldelta(now - last_reply_time), |
|
|
|
humanize.naturaldelta(now - last_seen), |
|
|
|
humanize.naturaldelta(now - last_refreshed) |
|
|
|
) |
|
|
|
|
|
|
|
print(s) |
|
|
|
|
|
|
|
|
|
|
|
def open_threads_with_new_replies(): |
|
|
|
db = sqlite3.connect(constants.database_path) |
|
|
|
threads = db.execute('SELECT ' |
|
|
|
'original_url, ' |
|
|
|
'board, ' |
|
|
|
'thread_no, ' |
|
|
|
'oldest_new_reply, ' |
|
|
|
'new_replies ' |
|
|
|
'FROM threads WHERE is_404 = 0').fetchall() |
|
|
|
db.close() |
|
|
|
|
|
|
|
for t in threads: |
|
|
|
url = t[0] |
|
|
|
board = t[1] |
|
|
|
thread_no = str(t[2]) |
|
|
|
oldest_new_reply = t[3] |
|
|
|
new_replies = t[4] |
|
|
|
now = helpers.timestamp_now() |
|
|
|
|
|
|
|
if new_replies: |
|
|
|
thread_url = helpers.generate_thread_html_url(board, thread_no) |
|
|
|
if oldest_new_reply: |
|
|
|
thread_url += '#q{}'.format(oldest_new_reply) |
|
|
|
|
|
|
|
webbrowser.open(thread_url) |
|
|
|
|
|
|
|
db = sqlite3.connect(constants.database_path) |
|
|
|
db.execute('UPDATE threads SET last_seen=?, new_replies=0 WHERE original_url=?', (now, url)) |
|
|
|
db.commit() |
|
|
|
db.close() |
|
|
|
|
|
|
|
|
|
|
|
def initialize_database(): |
|
|
|
if not os.path.exists(constants.cache_path): os.mkdir(constants.cache_path) |
|
|
|
|
|
|
|
if os.path.exists(constants.database_path): |
|
|
|
print('Table already exists.', end=' ') |
|
|
|
if click.confirm('Overwrite? (old file will be backed up)'): |
|
|
|
os.rename(constants.database_path, '{}.backup_{}'.format(constants.database_path, helpers.timestamp_now())) |
|
|
|
else: |
|
|
|
exit() |
|
|
|
|
|
|
|
# Creates database if it doesn't exist (which we have just ensured) |
|
|
|
db = sqlite3.connect(constants.database_path) |
|
|
|
|
|
|
|
db.execute('CREATE TABLE threads(' |
|
|
|
'original_url TEXT PRIMARY KEY, ' |
|
|
|
'board TEXT, ' |
|
|
|
'thread_no INTEGER, ' |
|
|
|
'anchored_reply INTEGER, ' |
|
|
|
'last_refreshed INTEGER, ' |
|
|
|
'last_seen INTEGER, ' |
|
|
|
'new_replies INTEGER, ' |
|
|
|
'oldest_new_reply STRING, ' |
|
|
|
'last_reply_time INTEGER,' |
|
|
|
'is_404 INTEGER)') |
|
|
|
db.close() |
|
|
|
|
|
|
|
|
|
|
|
def add_thread_to_db(url): |
|
|
|
board, thread_no, anchored_reply = helpers.parse_url(url) |
|
|
|
|
|
|
|
db = sqlite3.connect(constants.database_path) |
|
|
|
try: |
|
|
|
with db: |
|
|
|
now = helpers.timestamp_now() |
|
|
|
db.execute('INSERT INTO threads(original_url, board, thread_no, anchored_reply, last_seen, last_refreshed, new_replies, is_404) ' |
|
|
|
'VALUES(?,?,?,?,?,?,0,0)', (url, board, thread_no, anchored_reply, now, now)) |
|
|
|
print('Thread added.') |
|
|
|
except sqlite3.IntegrityError: |
|
|
|
print('This URL is already recorded. The database will not be altered but the thread will be dumped.') |
|
|
|
finally: |
|
|
|
db.close() |
|
|
|
|
|
|
|
dump_thread_by_url(url) |
|
|
|
|
|
|
|
|
|
|
|
def dump_thread_by_url(url): |
|
|
|
board, thread_no, null = helpers.parse_url(url) |
|
|
|
|
|
|
|
r = requests.get(helpers.generate_thread_json_url(board, thread_no)) |
|
|
|
j = r.json() |
|
|
|
now = str(helpers.timestamp_now()) |
|
|
|
|
|
|
|
dump_thread(r.text, j, board, thread_no, now) |
|
|
|
|
|
|
|
|
|
|
|
def remove_thread_by_id(board, thread_no): |
|
|
|
print('Removing /{}/{} ...'.format(board, thread_no)) |
|
|
|
|
|
|
|
db = sqlite3.connect(constants.database_path) |
|
|
|
cursor = db.cursor() |
|
|
|
|
|
|
|
urls = cursor.execute('SELECT original_url FROM threads WHERE board = ? AND thread_no = ?', (board, thread_no)).fetchall() |
|
|
|
print('Deleting {} entries.'.format(len(urls))) |
|
|
|
|
|
|
|
cursor.execute('DELETE FROM threads WHERE board = ? AND thread_no = ?', (board, thread_no)) |
|
|
|
|
|
|
|
db.commit() |
|
|
|
db.close() |
|
|
|
|
|
|
|
|
|
|
|
def dump_thread(raw_json, parsed_json, board, thread_no, last_seen): |
|
|
|
"""Dumps the JSON of the thread with a timestamp, as well as all the files in it (if they don't already exist). |
|
|
|
""" |
|
|
|
thread_dir = os.path.join(constants.cache_path, board, thread_no) |
|
|
|
files_dir = os.path.join(thread_dir, 'files') |
|
|
|
if not os.path.exists(files_dir): |
|
|
|
os.makedirs(files_dir) |
|
|
|
|
|
|
|
with open(os.path.join(thread_dir, last_seen + '.json'), 'w') as f: |
|
|
|
f.write(raw_json) |
|
|
|
wget.download(helpers.generate_thread_html_url(board, thread_no), os.path.join(thread_dir, last_seen + '.html')) |
|
|
|
print('') # wget fails to start a new line, mangling subsequent text output |
|
|
|
|
|
|
|
for p in parsed_json['posts']: |
|
|
|
if 'filename' in p: |
|
|
|
dump_file(p['tim'], p['ext'], p['filename'], files_dir) |
|
|
|
|
|
|
|
if 'extra_files' in p: |
|
|
|
for i in p['extra_files']: |
|
|
|
dump_file(i['tim'], i['ext'], i['filename'], files_dir) |
|
|
|
|
|
|
|
|
|
|
|
def dump_file(tim, ext, filename, files_dir): |
|
|
|
"""Downloads a file from 8chan (unless it already exists at the given location). |
|
|
|
|
|
|
|
:param ext: Extension of the file including dot. |
|
|
|
:param filename: Original name of the file without extension. |
|
|
|
:param files_dir: Directory where the file should go. Will be created if it doesn't exist. |
|
|
|
:param tim: tim property from JSON; also the 8ch filename (without the extension). |
|
|
|
:return: |
|
|
|
""" |
|
|
|
filename_8ch = tim + ext |
|
|
|
filename_original = filename + ext |
|
|
|
url = helpers.generate_file_url(filename_8ch) |
|
|
|
|
|
|
|
if not os.path.exists(os.path.join(files_dir, filename_original)): |
|
|
|
print('\tDownloading file {} ...'.format(url)) |
|
|
|
wget.download(url, os.path.join(files_dir, filename_original)) |
|
|
|
print('') # wget fails to start a new line, mangling subsequent text output |