Du kannst nicht mehr als 25 Themen auswählen Themen müssen mit entweder einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. #!/usr/bin/env python2
  2. """Usage: dl.py [-s SUBJECT] [options] THREAD
  3. -d --directory SAVEPATH Save images to a directory in SAVEPATH
  4. -s --subject SUBJECT If set, don't ask for folder name, but directly save
  5. in SUBJECT. If not set 8changrab tries the subject
  6. of the thread, and asks otherwise.
  7. --workers=<num> # of processes to spawn [default: 10]
  8. --original-names
  9. -h --help Show this
  10. -v --version Show version
  11. """
  12. from __future__ import print_function
  13. import sys
  14. import os
  15. import urllib2
  16. import shutil
  17. from docopt import docopt
  18. from bs4 import BeautifulSoup
  19. from os.path import expanduser
  20. from multiprocessing import Pool
  21. from multiprocessing import Value
  22. VERSION = "8changrab 0.2"
  23. DEFAULT_SAVE_PATH = '{}/8chan'.format(expanduser("~"))
  24. def pretty_update_progress(current, total):
  25. """Create a pretty progress bar by constantly updating one line"""
  26. progress_bar_length = 40
  27. percent_finished = current/float(total) # 0 -> 1
  28. fillers = int(percent_finished * progress_bar_length)
  29. empty_fillers = progress_bar_length - fillers
  30. progress_bar = "["
  31. progress_bar += fillers*"="
  32. progress_bar += ">"
  33. progress_bar += empty_fillers*" "
  34. progress_bar += "]"
  35. term = Terminal()
  36. with term.location(x=0):
  37. print("{} {}/{}".format(progress_bar, current, total), end="")
  38. sys.stdout.flush()
  39. def simple_update_progress(current, total):
  40. """Simple update progress"""
  41. sys.stdout.write("|")
  42. sys.stdout.flush()
  43. # Use dumb output on non-tty by default
  44. update_progress = simple_update_progress
  45. try:
  46. # If available, use pretty terminal output
  47. from blessings import Terminal
  48. TERM = Terminal()
  49. if TERM.is_a_tty:
  50. update_progress = pretty_update_progress
  51. except ImportError: pass
  52. HDR = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
  53. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  54. }
  55. def download_image(link, filename):
  56. """Download LINK to FILENAME if FILENAME doesn't already exist"""
  57. if os.path.exists(filename):
  58. return
  59. try:
  60. req = urllib2.Request(link, headers=HDR)
  61. response = urllib2.urlopen(req)
  62. with open(filename, "wb") as _file:
  63. _file.write(response.read())
  64. _file.close()
  65. except KeyboardInterrupt:
  66. # Remove unfinished download
  67. os.remove(filename)
  68. def download_and_update_progress(image):
  69. """Spawned as a separate process to download IMAGE"""
  70. try:
  71. link, filename = image
  72. download_image(link, filename)
  73. with COUNTER.get_lock():
  74. COUNTER.value += 1
  75. update_progress(COUNTER.value, TOTAL_COUNT.value)
  76. except KeyboardInterrupt: pass
  77. def main(argv):
  78. """Grabs images from an 8chan thread"""
  79. args = docopt(__doc__, argv=argv[1:], version=VERSION)
  80. url = args['THREAD']
  81. savepath = args['--directory'] or DEFAULT_SAVE_PATH
  82. if not "8ch.net" in url:
  83. print("Not an 8chan URL")
  84. return 1
  85. if not os.path.exists(savepath):
  86. os.makedirs(savepath)
  87. req = urllib2.Request(url, headers=HDR)
  88. page = urllib2.urlopen(req).read()
  89. soup = BeautifulSoup(page)
  90. if args['--subject']:
  91. topic = args['--subject']
  92. else:
  93. subject = soup.find(attrs={"class": "subject"})
  94. if subject:
  95. topic = subject.string
  96. else:
  97. topic = raw_input('Please specify folder name: ')
  98. workers = int(args['--workers'])
  99. download_path = '{}/{}'.format(savepath, topic)
  100. if not os.path.exists(download_path):
  101. os.makedirs(download_path)
  102. print("Downloading to {}".format(download_path))
  103. fileinfos = soup.find_all(attrs={"class": "fileinfo"})
  104. # Global values used in download_and_update_progress()
  105. global TOTAL_COUNT
  106. TOTAL_COUNT = Value('i', 0)
  107. global COUNTER
  108. COUNTER = Value('i', 0)
  109. TOTAL_COUNT.value = len(fileinfos)
  110. # Create a list of images to download
  111. downloads = []
  112. for fileinfo in fileinfos:
  113. if args['--original-names']:
  114. download_link = ""
  115. save_name = ""
  116. for link in fileinfo.find_all('a'):
  117. download_link = link.get('href')
  118. for sp in fileinfo.find_all('span', class_="postfilename"):
  119. save_name = unicode(sp.string)
  120. downloads.append((download_link,'%s/%s'%(download_path,save_name)))
  121. else:
  122. for link in fileinfo.find_all('a'):
  123. download_link = link.get('href')
  124. downloads.append((download_link,'%s/%s'%(download_path, link.string)))
  125. # Use a pool of processes to download the images in the list
  126. pool = Pool(workers)
  127. p = pool.map_async(download_and_update_progress, downloads)
  128. # Wait for downloads to complete
  129. try:
  130. # Wow. Python is great. I *ABSOLUTELY ADORE* python.
  131. results = p.get(0xFFFF)
  132. except KeyboardInterrupt:
  133. print("Aborting")
  134. pool.terminate()
  135. pool.join()
  136. else:
  137. pool.close()
  138. pool.join()
  139. print()
  140. return 0
  141. if __name__ == "__main__":
  142. sys.exit(main(sys.argv))