You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

filesfm.py 8.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. from sys import version_info
  2. from bs4 import BeautifulSoup
  3. from urllib.parse import urlparse, unquote, urlencode
  4. import shutil, cgi, os, urllib.request, argparse, sys, re, json, gettext
  5. gettext.install('messages', localedir='po')
  6. #global variables
  7. base_folder = 'fmloader/'
  8. version = '0.0.5'
  9. #end global variables
  10. #declarative jumbo
  11. try:
  12. assert version_info >= (3,6)
  13. except:
  14. print(_("you must have python 3.6 or higher installed\nthe script was intended to run on 3.6 or higher and won't account for python2 at all as well!"))
  15. sys.exit()
  16. #end declarative jumbo
  17. def check_folder(folder):
  18. try:
  19. os.mkdir(folder)
  20. except FileExistsError:
  21. pass #¯\_(ツ)_/¯
  22. def _declare_opener(additional_header=None): #do not touch, it works as is, not that it needs better user-agents >.>
  23. opener = urllib.request.build_opener()
  24. if additional_header is None:
  25. opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0')]
  26. else:
  27. opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0')] + additional_header
  28. urllib.request.install_opener(opener)
  29. def _download_file(link, file_link, additional_header=None, additional_fold=''): #original file link for the referrer, file_link is actual download link
  30. try:
  31. if additional_header is not None:
  32. _declare_opener([('Referer', link),additional_header])
  33. else:
  34. _declare_opener([('Referer', link),])
  35. with urllib.request.urlopen(file_link) as response:
  36. folder = base_folder+additional_fold
  37. Buffy = folder.split('/')
  38. buf = ''
  39. for i in range(len(Buffy)-1):
  40. buf += Buffy[i] + '/'
  41. check_folder(buf)
  42. try:
  43. _, params = cgi.parse_header(response.headers.get('Content-Disposition', ''))
  44. print('Downloading file '+params['filename']+'...')
  45. out_file = open(folder+params['filename'], 'wb')
  46. except KeyError:
  47. filename = unquote(os.path.basename(urlparse(file_link).path))
  48. print('Downloading file '+filename+'...')
  49. out_file = open(folder+filename, 'wb')
  50. shutil.copyfileobj(response, out_file)
  51. except ValueError:
  52. raise ValueError
  53. print(_('Something\'s wrong with the link download routine'))
  54. return -1
  55. else:
  56. print(_('Done!'))
  57. return 0
  58. def __zippyshare(parsed,baseurl):
  59. try:
  60. result = parsed.find('div', class_='right').find('script').string.split(' ')[6:17]
  61. for i in range(len(result)): result[i] = result[i].replace('(','').replace(')','').replace('\"','').replace(';','').replace('\n','')
  62. href = result[0] + str( (int(result[2]) % int(result[4])) + (int(result[6]) % int(result[8])) ) + result[10]
  63. return [1,'http://' + baseurl + href]
  64. except:
  65. return [0,'']
  66. def __filesfm_folder(parsed, link):
  67. file_array = []
  68. counter = 1
  69. for i in parsed.find_all('div', class_='file'):
  70. file_array.append([i.get('title').strip().split('<br/>'),i.find('div', class_='item_name').get('onclick').replace('\'',"")[16:]])
  71. print(str(counter)+' - '+file_array[-1][0][0]+' -- '+file_array[-1][0][1][:-1])
  72. counter = counter+1
  73. return file_array
  74. def __filesfm_file(parsed):
  75. result = [0,'']
  76. try:
  77. result[1] = (parsed.find('div', class_='drop-down-options').find('a').get('href'))
  78. except:
  79. #print(_("Required div element not found or incorrect site"))
  80. return result
  81. if result[1].startswith('//'):
  82. if args.interactive:
  83. result[0] = 2
  84. else:
  85. result = [1,str('https://'+result[1][2:])]
  86. elif result[1].startswith('/d'):
  87. result = [1,str('https://files.fm'+result[1])]
  88. return result
  89. def get_json(link, post=None):
  90. if post is not None:
  91. data = urlencode(post)
  92. data = bytearray(data, 'utf-8')
  93. req = urllib.request.Request(link,data)
  94. request = urllib.request.urlopen(req)
  95. else:
  96. request = urllib.request.urlopen(link)
  97. return json.loads(request.read().decode('utf-8'))
  98. def __mediafire_crawler(folder_key, folder, link):
  99. folder_folders = get_json('https://www.mediafire.com/api/1.4/folder/get_content.php?r=mgsp&content_type=folders&filter=all&order_by=name&order_direction=asc&chunk=1&version=1.5&folder_key='+folder_key+'&response_format=json')
  100. folder_files = get_json('https://www.mediafire.com/api/1.4/folder/get_content.php?r=plqv&content_type=files&filter=all&order_by=name&order_direction=asc&chunk=1&version=1.5&folder_key='+folder_key+'&response_format=json')
  101. folders = folder_folders['response']['folder_content']['folders']
  102. files = folder_files['response']['folder_content']['files']
  103. buf = []
  104. if folders is not None:
  105. for i in folders:
  106. buf += __mediafire_crawler(i['folderkey'],folder+i['name']+'/', link)
  107. for i in files:
  108. file = __mediafire_file(url2bs(i['links']['normal_download']),i['links']['normal_download'])[1]
  109. buf.append([i['filename'],file,folder])
  110. _download_file(link, file, additional_fold=folder)
  111. return buf
  112. def __mediafire_folder(parsed, link):
  113. folder_key = link[link.find('folder')+7:link.rfind('/')]
  114. _declare_opener([('Referer', link),('Content-Type','application/json')])
  115. folder_info = get_json('https://www.mediafire.com/api/1.4/folder/get_info.php',(('folder_key', folder_key), ('response_format', 'json'), ('recursive','yes')))
  116. __mediafire_crawler(folder_key, folder_info['response']['folder_info']['name']+'/',link)
  117. return 0
  118. def __mediafire_file(parsed, link):
  119. if link.find('folder') != -1:
  120. return [2,'']
  121. result = (parsed.find('div',class_='download_link').find('script'))
  122. regex = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
  123. result = re.findall(regex, result.string)
  124. if len(result) == 1:
  125. return [1,result[0]]
  126. else:
  127. return [0,'']
  128. def _parse_files(parsed, link, __func_host): #this function is intended for folders only, but it may work for single files as well
  129. file_array = __func_host(parsed, link) #['filename','directurl']
  130. input_invalid = True
  131. if args.interactive:
  132. while input_invalid:
  133. user_input = input('\n' + _('Select your desired files by their indexes, comma separated (ex. 1,3,4): '))
  134. try:
  135. user_input = user_input.replace(' ',"").split(',')
  136. for i in range(len(user_input)):
  137. user_input[i] = int(user_input[i])-1
  138. except:
  139. print('Your input was invalid, try again')
  140. else:
  141. input_invalid = False
  142. for i in user_input:
  143. try:
  144. pull = file_array[i][1]
  145. except:
  146. print(_('Errored index, continuing...'))
  147. continue
  148. _download_file(link,pull)
  149. elif file_array == 0:
  150. return 0
  151. else:
  152. for i in file_array:
  153. _download_file(link, i[1])
  154. def url2bs(link): #simple, feed a link, poop a beautifulsoup object
  155. try:
  156. parsed = urllib.request.urlopen(link)
  157. parsed_bs = BeautifulSoup(parsed.read().decode('utf-8'), 'html.parser')
  158. except ValueError:
  159. print(_("Incorrect URL typed in, please copy and paste the link from the browser bar if possible! (recommended formatting: https://files.fm/u/{FILEID} )"))
  160. return -1
  161. else:
  162. return parsed_bs
  163. def parse_link(link): #core function, if you'll ever intend to import it obv. feed it with a files.fm link
  164. check_folder(base_folder)
  165. _declare_opener()
  166. parsed = url2bs(link)
  167. try:
  168. url = urlparse(link)
  169. except:
  170. raise ValueError
  171. if url.netloc.endswith('files.fm'): result = __filesfm_file(parsed); __func_host = __filesm_folder
  172. elif url.netloc.endswith('.zippyshare.com'): result = __zippyshare(parsed, url.netloc)
  173. elif url.netloc.endswith('.mediafire.com'): result = __mediafire_file(parsed, link); __func_host = __mediafire_folder
  174. else: print('wrong url'); return -1
  175. if result[0] == 0: print('url could not be retrieved'); return -1
  176. elif result[0] == 1:
  177. if len(result) == 3: return _download_file(link,result[1], headers)
  178. else: return _download_file(link,result[1])
  179. elif result[0] == 2: return _parse_files(parsed, link, __func_host)
  180. else: print(_('this should not happen')); return -1
  181. if __name__ == '__main__':
  182. parser = argparse.ArgumentParser(description=_('FilesFM python file&folder downloader'))
  183. parser.add_argument('--interactive','-i', action='store_true', help=_('Enables prompts done during the download\nEnable this to activate partial folder downloads'))
  184. parser.add_argument('urls', help=_('links to filehosts'), nargs='*', type=str)
  185. args = parser.parse_args()
  186. if len(sys.argv) > 1:
  187. for i in args.urls:
  188. parse_link(i)
  189. else:
  190. parser.print_help()
  191. sys.exit(1)