Browse Source

Refactored a lot of the file IO code so that all paths are defined in a centralized way. Fixes #17

master
Abrax 3 years ago
parent
commit
af36c9934e
6 changed files with 85 additions and 45 deletions
  1. 0
    3
      constants.py
  2. 13
    12
      db_methods.py
  3. 2
    3
      db_model.py
  4. 55
    6
      file_io.py
  5. 0
    9
      lizard.py
  6. 15
    12
      web_methods.py

+ 0
- 3
constants.py View File

@@ -1,5 +1,2 @@
lizard_data_directory = 'lizard_data'
files_directory = 'files'
orm_db_path = 'threads.orm.db'
user_agent = 'https://git.teknik.io/abrax/lizard'
conservative_refresh_criterion = 10

+ 13
- 12
db_methods.py View File

@@ -1,27 +1,28 @@
import os
import shutil
import webbrowser

import humanize

import constants
import db_model
import file_io
import time_methods
import web_methods
from constants import conservative_refresh_criterion
from db_model import Thread
from db_model import Thread, db
from file_io import path_to_database, ensure_directory_exists, timestamped_path_to_exported_threads_file
from json_methods import time_of_last_reply, get_oldest_reply
from web_methods import fetch_and_parse_thread_json, dump_thread_html, dump_thread_files, parse_url


def create_new_database():
"""Creates a new database. Will fail if database already exists."""
p = constants.orm_db_path
p = path_to_database()
ensure_directory_exists(os.path.dirname(p))

if os.path.exists(p):
print('{} already exists, doing nothing.'.format(os.path.abspath(p)))
else:
print('Creating database, {} will be the location...'.format(os.path.abspath(p)), end=' ')
print('Creating database at {}...'.format(os.path.abspath(p)), end=' ')

Thread.create_table()
print('Done!')

@@ -29,15 +30,15 @@ def create_new_database():
def backup_database():
"""Makes a copy of the current database."""

if os.path.exists(constants.orm_db_path):
old_path = constants.orm_db_path
new_path = '{}.{}'.format(constants.orm_db_path, time_methods.timestamp_now())
print('Backing up database, {} will be the backup...'.format(new_path), end=' ')
if os.path.exists(path_to_database()):
old_path = path_to_database()
new_path = '{}.{}'.format(old_path, time_methods.timestamp_now())
print('Backing up database to {}...'.format(new_path), end=' ')

shutil.copy(old_path, new_path)
print('Done!')
else:
print(constants.orm_db_path + " doesn't exist, doing nothing.")
print(path_to_database() + " doesn't exist, doing nothing.")


def add_thread_to_db(url):
@@ -189,6 +190,6 @@ def export_thread_list():
threads = Thread.select()
print('Exporting {} threads.'.format(len(threads)))

with open(os.path.join('Exported {}.txt'.format(time_methods.timestamp_now())), 'w') as f:
with open(timestamped_path_to_exported_threads_file(), 'w') as f:
for t in threads:
f.write('lizard a "{}"\n'.format(t.original_url))

+ 2
- 3
db_model.py View File

@@ -1,8 +1,7 @@
from peewee import *
import constants

db = SqliteDatabase(constants.orm_db_path)
from file_io import path_to_database

db = SqliteDatabase(path_to_database())

class Thread(Model):
original_url = CharField()

+ 55
- 6
file_io.py View File

@@ -1,6 +1,7 @@
import os
from os.path import expanduser

from constants import files_directory
import time_methods


def save_file(file_path, file_contents):
@@ -25,6 +26,39 @@ def normalize_filename(f):
return normalized


def ensure_directory_exists(p):
""" Takes the directory part of the path, and if it doesn't exist, creates it. Otherwise does nothing. """
if os.path.isfile(p):
raise "Was expecting {} to be a directory, but it was a file instead. Please submit stack trace to the issue " \
"tracker. ".format(p)

if not os.path.exists(p):
os.makedirs(p)


def path_to_lizard_data():
d = os.path.join(expanduser("~"), 'lizard_data')
p = os.path.abspath(d)
return p


def path_to_database():
d = path_to_lizard_data()
p = os.path.join(d, 'threads.orm.db')
return p


def path_to_thread_cache(board, thread_no):
p = os.path.join(path_to_lizard_data(), board, thread_no)
return p


def path_to_cached_file(board, thread_no, normalized_filename):
tc = path_to_thread_cache(board, thread_no)
p = os.path.join(tc, 'files', normalized_filename)
return p


def file_exists_in_cache(board, thread_no, normalized_filename):
p = path_to_cached_file(board, thread_no, normalized_filename)

@@ -32,13 +66,28 @@ def file_exists_in_cache(board, thread_no, normalized_filename):
return result


def path_to_cached_file(board, thread_no, normalized_filename):
tc = path_to_thread_cache(board, thread_no)
p = os.path.join(tc, files_directory, normalized_filename)
def generate_json_path(board, thread_no):
""" Returns a path like /home/USER/lizard_data/b/123/123456.json. Note that the filename incoprorates a
timestamp, so repeated calls to this method are likely to produce different results. """
d = path_to_thread_cache(board, thread_no)
f = '{}.json'.format(time_methods.timestamp_now())
p = os.path.join(d, f)
return p


def generate_html_path(board, thread_no):
""" Returns a path like /home/USER/lizard_data/b/123/123456.html. Note that the filename incoprorates a
timestamp, so repeated calls to this method are likely to produce different results. """
d = path_to_thread_cache(board, thread_no)
f = '{}.html'.format(time_methods.timestamp_now())
p = os.path.join(d, f)
return p


def path_to_thread_cache(board, thread_no):
p = os.path.join(board, thread_no)
def timestamped_path_to_exported_threads_file():
""" Returns a path like /home/USER/lizard_data/b/123/123456.html. Note that the filename incoprorates a
timestamp, so repeated calls to this method are likely to produce different results. """
f = 'Exported {}.txt'.format(time_methods.timestamp_now())
p = os.path.join(path_to_lizard_data(), f)

return p

+ 0
- 9
lizard.py View File

@@ -1,9 +1,6 @@
#!/usr/bin/env python3
import os
import sys

import db_methods
from constants import lizard_data_directory


def help_info():
@@ -46,12 +43,6 @@ Available commands:


def main():
# Set working directory to be the specified path under the home folder
from os.path import expanduser
lizard_dir = os.path.join(expanduser("~"), lizard_data_directory)
if not os.path.exists(lizard_dir): os.makedirs(lizard_dir)
os.chdir(lizard_dir)

n = len(sys.argv)
if n < 2: help_info()
command = sys.argv[1]

+ 15
- 12
web_methods.py View File

@@ -1,32 +1,34 @@
import json
import os
import re

import requests
from file_io import path_to_thread_cache, save_file, file_exists_in_cache, normalize_filename, path_to_cached_file
from json_methods import extract_filenames

import time_methods
import constants
import time_methods
from file_io import *
from file_io import generate_json_path
from json_methods import extract_filenames


def fetch_and_parse_thread_json(board, thread_no):
url = generate_thread_json_url(board, thread_no)
p = generate_json_path(board, thread_no)

json_file_name = os.path.join(path_to_thread_cache(board, thread_no), '{}.json'.format(
time_methods.timestamp_now()))
if not download_file(url, json_file_name):
if not download_file(url, p):
return None
else:
return json.load(open(json_file_name))
return json.load(open(p))


def dump_thread_html(board, thread_no):
print('Downloading HTML for /{}/{}...'.format(board, thread_no))

filename = '{}.html'.format(time_methods.timestamp_now())
download_file(generate_thread_html_url(board, thread_no),
os.path.join(path_to_thread_cache(board, thread_no), filename))
url = generate_thread_html_url(board, thread_no)
p = generate_html_path(board, thread_no)

print('Downloading html to {}'.format(p))

download_file(url, p)


def dump_thread_files(board, thread_no, thread_json):
@@ -66,7 +68,8 @@ def download_file(file_url, file_path):
elif r.status_code == 404:
return False
else:
raise 'Unexpected status code {} while trying to fetch {}'.format(r.status_code, file_url)
raise "Unexpected status code {} while trying to fetch {} - try opening in the browser, if that doesn't work " \
"submit an issue to the tracker.".format(r.status_code, file_url)


def generate_thread_json_url(board, thread_no):

Loading…
Cancel
Save