Bläddra i källkod

Initial commit.

master
Abrax 4 år sedan
incheckning
1a1c9da876
5 ändrade filer med 353 tillägg och 0 borttagningar
  1. 4
    0
      .gitignore
  2. 2
    0
      constants.py
  3. 38
    0
      helpers.py
  4. 69
    0
      lizard.py
  5. 240
    0
      logic.py

+ 4
- 0
.gitignore Visa fil

@@ -0,0 +1,4 @@
.idea/*
__pycache__/*
data/*
threads.db*

+ 2
- 0
constants.py Visa fil

@@ -0,0 +1,2 @@
cache_path = 'data'
database_path = 'threads.db' # Hardcoded for now since otherwise every other script must be told where the DB is, hurting usability

+ 38
- 0
helpers.py Visa fil

@@ -0,0 +1,38 @@
import datetime
import re
import time


def parse_url(url):
"""Extracts the board name and thread no from a URL.
"""
parts = url.split('#', 1)

board, thread_no = re.findall('(\w+)\/res\/(\d+)', parts[0])[0]
anchored_reply = '' if len(parts)<2 else re.findall('q?(\d+)$', parts[1])[0]

return [board, thread_no, anchored_reply]


def timestamp_now():
return int(time.time())


def timestamp_to_date(t):
d = datetime.datetime.fromtimestamp(t).strftime('%Y-%m-%d %H:%M:%S')
return d


def generate_thread_json_url(board, thread_no):
url = 'https://8ch.net/{}/res/{}.json'.format(board, thread_no)
return url


def generate_thread_html_url(board, thread_no):
url = 'https://8ch.net/{}/res/{}.html'.format(board, thread_no)
return url


def generate_file_url(filename):
url = 'https://media.8ch.net/file_store/{}'.format(filename)
return url

+ 69
- 0
lizard.py Visa fil

@@ -0,0 +1,69 @@
#!/usr/bin/env python
import sys
from logic import *


def help_info():
print(""" _.-~` `~-.
_.--~~~---,.__ _.,;; . -=(@'`\\
.-` ``~~~~--~~` ';;; ____)
_.' '. ';;;;; '`_.'
.-~;` `\ ' ';;;;;__.~`
.' .' `'. | / /;''
\/ .---'''``) /'-._____.--'\ \\
_/| (` / /` `\ \__
', `/- \ \ __/ (_ /-\-\-`
`;'-..___) | `/-\-\-` Lizard
`-. .' the 8chan monitor
`~~~~``

Usage: ./lizard [command] [options]

Available commands:
l - Lists all threads in database. Legend: ! - new replies, x - 404'd.
r - Refresh all threads and update database.
o - Open all threads with new replies in the default browser. Will also update the "new replies"
field accordingly.
i - Initialize new database.
g - Get (download) thread and the files in it. Requires option (URL of the thread).
Example: "lizard a https://8ch.net/
a - Add thread to database. Requires option (URL of the thread). Will also attempt to download
the thread.
d - Remove all instances of a thread (board & number) from the database.
Example: "lizard d b 123" will remove thread >>>/b/123
""")
exit()


if __name__ == '__main__':
os.chdir(sys.path[0]) # set working directory to this script's directory

n = len(sys.argv)
if n < 2: help_info()
command = sys.argv[1]

if n == 2:
if command == 'r':
refresh_all_threads()
elif command == 'l':
list_threads()
elif command == 'o':
open_threads_with_new_replies()
elif command == 'i':
initialize_database()
else:
help_info()
elif n == 3:
if command == 'a':
add_thread_to_db(sys.argv[2])
elif command == 'g':
dump_thread_by_url(sys.argv[2])
else:
help_info()
elif n == 4:
if command == 'd':
remove_thread_by_id(sys.argv[2], sys.argv[3])
else:
help_info()
else:
help_info()

+ 240
- 0
logic.py Visa fil

@@ -0,0 +1,240 @@
import os
import sqlite3
import webbrowser
import click
import requests
import wget
import constants
import helpers
import humanize


def refresh_all_threads():
print('Getting list of threads...')

db = sqlite3.connect(constants.database_path)
threads = db.execute('SELECT original_url, board, thread_no, last_refreshed FROM threads WHERE is_404 = 0').fetchall()
db.close()

print('Refreshing all threads...')

for t in threads:
url = t[0]
board = t[1]
thread_no = t[2]
last_refreshed = int(t[3])
now = helpers.timestamp_now()

db = sqlite3.connect(constants.database_path)
db.execute('UPDATE threads SET last_refreshed=? WHERE original_url=?',
(now, url))
db.commit()
db.close()

r = requests.get(helpers.generate_thread_json_url(board, thread_no))
if r.status_code == requests.codes.ok:
j = r.json()

db = sqlite3.connect(constants.database_path)
db.execute('UPDATE threads SET last_reply_time=? WHERE original_url=?',
(j['posts'][-1]['last_modified'], url))
db.commit()
db.close()

last_modified = int(j['posts'][-1]['last_modified'])
if last_modified > last_refreshed:
print('{}/{} has new posts!'.format(board, thread_no))

new_replies = [i for i in j['posts'][1:] if int(i['last_modified']) > last_refreshed]
oldest_new_reply = min(new_replies, key=lambda x: int(x['last_modified']))

db = sqlite3.connect(constants.database_path)
db.execute('UPDATE threads SET new_replies=1, oldest_new_reply=? WHERE original_url=?',
(oldest_new_reply['no'], url))
db.commit()
db.close()

dump_thread(r.text, j, board, str(thread_no), str(last_modified))
elif r.status_code == 404:
print('{}/{} has gone 404.'.format(board, thread_no))

db = sqlite3.connect(constants.database_path)
db.execute('UPDATE threads SET last_refreshed = ?, is_404 = 1 WHERE original_url = ?', (now, url))
db.commit()
db.close()

print('Done!')


def list_threads():
db = sqlite3.connect(constants.database_path)
threads = db.execute('SELECT original_url, last_seen, new_replies, is_404, last_refreshed, last_reply_time FROM threads').fetchall()
db.close()

now = helpers.timestamp_now()

format_string = '{}{} {:<50}{:<20}{:<20}{:<20}'
print((format_string).format(' ', ' ', 'Thread URL', 'Last reply', 'Last seen', 'Refreshed'))
print('-----------------------------------------------------------------------------------------------------------------')

for t in threads:
url = t[0]
last_seen = int(t[1])
new_replies = str(t[2])
is_404 = str(t[3])
last_refreshed = t[4]
last_reply_time = t[5]

s = format_string.format(
'!' if new_replies == '1' else ' ',
'x' if is_404 == '1' else ' ',
url,
humanize.naturaldelta(now - last_reply_time),
humanize.naturaldelta(now - last_seen),
humanize.naturaldelta(now - last_refreshed)
)

print(s)


def open_threads_with_new_replies():
db = sqlite3.connect(constants.database_path)
threads = db.execute('SELECT '
'original_url, '
'board, '
'thread_no, '
'oldest_new_reply, '
'new_replies '
'FROM threads WHERE is_404 = 0').fetchall()
db.close()

for t in threads:
url = t[0]
board = t[1]
thread_no = str(t[2])
oldest_new_reply = t[3]
new_replies = t[4]
now = helpers.timestamp_now()

if new_replies:
thread_url = helpers.generate_thread_html_url(board, thread_no)
if oldest_new_reply:
thread_url += '#q{}'.format(oldest_new_reply)

webbrowser.open(thread_url)

db = sqlite3.connect(constants.database_path)
db.execute('UPDATE threads SET last_seen=?, new_replies=0 WHERE original_url=?', (now, url))
db.commit()
db.close()


def initialize_database():
if not os.path.exists(constants.cache_path): os.mkdir(constants.cache_path)

if os.path.exists(constants.database_path):
print('Table already exists.', end=' ')
if click.confirm('Overwrite? (old file will be backed up)'):
os.rename(constants.database_path, '{}.backup_{}'.format(constants.database_path, helpers.timestamp_now()))
else:
exit()

# Creates database if it doesn't exist (which we have just ensured)
db = sqlite3.connect(constants.database_path)

db.execute('CREATE TABLE threads('
'original_url TEXT PRIMARY KEY, '
'board TEXT, '
'thread_no INTEGER, '
'anchored_reply INTEGER, '
'last_refreshed INTEGER, '
'last_seen INTEGER, '
'new_replies INTEGER, '
'oldest_new_reply STRING, '
'last_reply_time INTEGER,'
'is_404 INTEGER)')
db.close()


def add_thread_to_db(url):
board, thread_no, anchored_reply = helpers.parse_url(url)

db = sqlite3.connect(constants.database_path)
try:
with db:
now = helpers.timestamp_now()
db.execute('INSERT INTO threads(original_url, board, thread_no, anchored_reply, last_seen, last_refreshed, new_replies, is_404) '
'VALUES(?,?,?,?,?,?,0,0)', (url, board, thread_no, anchored_reply, now, now))
print('Thread added.')
except sqlite3.IntegrityError:
print('This URL is already recorded. The database will not be altered but the thread will be dumped.')
finally:
db.close()

dump_thread_by_url(url)


def dump_thread_by_url(url):
board, thread_no, null = helpers.parse_url(url)

r = requests.get(helpers.generate_thread_json_url(board, thread_no))
j = r.json()
now = str(helpers.timestamp_now())

dump_thread(r.text, j, board, thread_no, now)


def remove_thread_by_id(board, thread_no):
print('Removing /{}/{} ...'.format(board, thread_no))

db = sqlite3.connect(constants.database_path)
cursor = db.cursor()

urls = cursor.execute('SELECT original_url FROM threads WHERE board = ? AND thread_no = ?', (board, thread_no)).fetchall()
print('Deleting {} entries.'.format(len(urls)))

cursor.execute('DELETE FROM threads WHERE board = ? AND thread_no = ?', (board, thread_no))

db.commit()
db.close()


def dump_thread(raw_json, parsed_json, board, thread_no, last_seen):
"""Dumps the JSON of the thread with a timestamp, as well as all the files in it (if they don't already exist).
"""
thread_dir = os.path.join(constants.cache_path, board, thread_no)
files_dir = os.path.join(thread_dir, 'files')
if not os.path.exists(files_dir):
os.makedirs(files_dir)

with open(os.path.join(thread_dir, last_seen + '.json'), 'w') as f:
f.write(raw_json)
wget.download(helpers.generate_thread_html_url(board, thread_no), os.path.join(thread_dir, last_seen + '.html'))
print('') # wget fails to start a new line, mangling subsequent text output

for p in parsed_json['posts']:
if 'filename' in p:
dump_file(p['tim'], p['ext'], p['filename'], files_dir)

if 'extra_files' in p:
for i in p['extra_files']:
dump_file(i['tim'], i['ext'], i['filename'], files_dir)


def dump_file(tim, ext, filename, files_dir):
"""Downloads a file from 8chan (unless it already exists at the given location).

:param ext: Extension of the file including dot.
:param filename: Original name of the file without extension.
:param files_dir: Directory where the file should go. Will be created if it doesn't exist.
:param tim: tim property from JSON; also the 8ch filename (without the extension).
:return:
"""
filename_8ch = tim + ext
filename_original = filename + ext
url = helpers.generate_file_url(filename_8ch)

if not os.path.exists(os.path.join(files_dir, filename_original)):
print('\tDownloading file {} ...'.format(url))
wget.download(url, os.path.join(files_dir, filename_original))
print('') # wget fails to start a new line, mangling subsequent text output

Laddar…
Avbryt
Spara