Downloader for a certain mongolian basket weaving fourm
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

down.py 3.3KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. import requests
  2. import json
  3. import re
  4. import sys
  5. import getopt
  6. import os.path
  7. import time
  8. def downloadBySearch(pattern, field, board, path="./"):
  9. print(board)
  10. print('https://a.4cdn.org/'+ board +'/catalog.json')
  11. catalog = requests.get('https://a.4cdn.org/'+ board +'/catalog.json').json()#print(catalog[0].get('threads')[0].get('com'))
  12. for page in catalog:
  13. for thread in page.get('threads'):
  14. if thread.get(field) != None:
  15. if re.search(pattern, thread.get(field)):
  16. print("Download thread No:" + str(thread.get('no')) + "\nContent: " + thread.get('com'))
  17. for post in requests.get('https://a.4cdn.org/'+ board +'/thread/' + str(thread.get('no')) + ".json").json().get('posts'):
  18. getImg(post, board, path)
  19. def getImg(post, board, path):
  20. if post.get('tim') != None:
  21. img = requests.get('https://i.4cdn.org/'+ board +'/' + str(post.get('tim')) + post.get('ext')).content
  22. filepath = path + post.get('filename') + post.get('ext')
  23. if not os.path.isfile(filepath):
  24. print("Downloading image from post No:" + str(post.get('no')) + " in: " + filepath)
  25. with open(filepath, "wb") as f:
  26. f.write(img)
  27. def downloadThreadImages(thread, board, path="./"):
  28. for post in requests.get('https://a.4cdn.org/'+ board +'/thread/' + thread + ".json").json().get('posts'):
  29. getImg(post, board, path)
  30. def watchThread(thread, board, path="./"):
  31. timestamp = time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(time.time()))
  32. for post in requests.get('https://a.4cdn.org/'+ board +'/thread/' + thread + ".json").json().get('posts'):
  33. getImg(post, board, path)
  34. while True:
  35. print('Refreshing thread No: ' + thread)
  36. response = requests.get('https://a.4cdn.org/' + board + '/thread/'+ thread +'.json', headers={"If-Modified-Since" : timestamp})
  37. if response.status_code == 200:
  38. print("New posts since " + timestamp)
  39. timestamp = time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(time.time()))
  40. for post in response.json().get('posts'):
  41. getImg(post, board, path)
  42. time.sleep(10)
  43. def main(argv):
  44. pattern = ''
  45. field = ''
  46. board = ''
  47. path = ''
  48. try:
  49. opts, args = getopt.getopt(argv, "wb:n:s:f:p:",["board=","number=","search=","field=","path="])
  50. except getopt.GetoptError:
  51. print("The arguments are order-sensitive\n-w <watch (daemon)> -b <board> -n <thread number> -s <regex pattern> -f <field>")
  52. sys.exit(2)
  53. for opt, arg in opts:
  54. if opt == "-w":
  55. watch = True
  56. elif opt in ("-b", "--board"):
  57. board = arg
  58. elif opt in ("-p", "--path"):
  59. path = arg
  60. elif opt in ("-n", "--number"):
  61. if board != '':
  62. if watch:
  63. watchThread(arg, board, path)
  64. downloadThreadImages(arg, board, path)
  65. sys.exit(0)
  66. elif opt in ("-s", "--search"):
  67. pattern = arg
  68. elif opt in ("-f", "--field"):
  69. field = arg
  70. if pattern != '' and field != '' and board != '':
  71. downloadBySearch(pattern, field, board, path)
  72. if __name__ == "__main__":
  73. main(sys.argv[1:])