Browse Source

Added Mediafire.com (folder download) support, its in beta (does not support inpartial folder downloads)

Added another bunch of development features (that will HAUNT YOU)
Fixed a couple of bugs with Mediafire.com file download
master
pleyar 5 years ago
parent
commit
92930be78a
  1. 1
      .gitignore
  2. 118
      filesfm.py

1
.gitignore vendored

@ -40,3 +40,4 @@ Thumbs.db @@ -40,3 +40,4 @@ Thumbs.db
################
__pycache__/
fmloader/

118
filesfm.py

@ -1,11 +1,11 @@ @@ -1,11 +1,11 @@
from sys import version_info
from bs4 import BeautifulSoup
from urllib.parse import urlparse, unquote
import shutil, cgi, os, urllib.request, argparse, sys, re
from urllib.parse import urlparse, unquote, urlencode
import shutil, cgi, os, urllib.request, argparse, sys, re, json
#global variables
base_folder = 'files.fm/'
version = '0.0.4'
base_folder = 'fmloader/'
version = '0.0.5'
#end global variables
#declarative jumbo
@ -30,7 +30,7 @@ def _declare_opener(additional_header=None): #do not touch, it works as is, not @@ -30,7 +30,7 @@ def _declare_opener(additional_header=None): #do not touch, it works as is, not
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0')] + additional_header
urllib.request.install_opener(opener)
def _download_file(link, file_link, additional_header=None): #original file link for the referrer, file_link is actual download link
def _download_file(link, file_link, additional_header=None, additional_fold=''): #original file link for the referrer, file_link is actual download link
try:
if additional_header is not None:
_declare_opener([('Referer', link),additional_header])
@ -40,11 +40,19 @@ def _download_file(link, file_link, additional_header=None): #original file link @@ -40,11 +40,19 @@ def _download_file(link, file_link, additional_header=None): #original file link
try:
_, params = cgi.parse_header(response.headers.get('Content-Disposition', ''))
print('Downloading file '+params['filename']+'...')
out_file = open(base_folder+params['filename'], 'wb')
out_file = open(base_folder+additional_fold+params['filename'], 'wb')
except KeyError:
filename = unquote(os.path.basename(urlparse(file_link).path))
print('Downloading file '+filename+'...')
out_file = open(base_folder+filename, 'wb')
out_file = open(base_folder+additional_fold+filename, 'wb')
except FileNotFoundError:
folder = base_folder+additional_fold
folder = folder.split('/')
buf = ''
for i in range(len(folder)):
buf += folder[i] + '/'
check_folder(buf)
out_file = open(folder+params['filename'], 'wb')
shutil.copyfileobj(response, out_file)
except ValueError:
raise ValueError
@ -64,7 +72,7 @@ def __zippyshare(parsed,baseurl): @@ -64,7 +72,7 @@ def __zippyshare(parsed,baseurl):
return [1,'http://' + baseurl + href]
except:
return [0,'']
def __filesfm_folder(parsed):
def __filesfm_folder(parsed, link):
file_array = []
counter = 1
for i in parsed.find_all('div', class_='file'):
@ -89,7 +97,46 @@ def __filesfm_file(parsed): @@ -89,7 +97,46 @@ def __filesfm_file(parsed):
result = [1,str('https://files.fm'+result[1])]
return result
def __mediafire_file(parsed):
def get_json(link, post=None):
if post is not None:
data = urlencode(post)
data = bytearray(data, 'utf-8')
req = urllib.request.Request(link,data)
request = urllib.request.urlopen(req)
else:
request = urllib.request.urlopen(link)
return json.loads(request.read().decode('utf-8'))
def __mediafire_crawler(folder_key, folder, link):
folder_folders = get_json('https://www.mediafire.com/api/1.4/folder/get_content.php?r=mgsp&content_type=folders&filter=all&order_by=name&order_direction=asc&chunk=1&version=1.5&folder_key='+folder_key+'&response_format=json')
folder_files = get_json('https://www.mediafire.com/api/1.4/folder/get_content.php?r=plqv&content_type=files&filter=all&order_by=name&order_direction=asc&chunk=1&version=1.5&folder_key='+folder_key+'&response_format=json')
folders = folder_folders['response']['folder_content']['folders']
files = folder_files['response']['folder_content']['files']
buf = []
if folders is not None:
for i in folders:
buf += __mediafire_crawler(i['folderkey'],folder+i['name']+'/', link)
for i in files:
file = __mediafire_file(url2bs(i['links']['normal_download']),i['links']['normal_download'])[1]
buf.append([i['filename'],file,folder])
_download_file(link, file, additional_fold=folder)
return buf
def __mediafire_folder(parsed, link):
folder_key = link[link.find('folder')+7:link.rfind('/')]
_declare_opener([('Referer', link),('Content-Type','application/json')])
folder_info = get_json('https://www.mediafire.com/api/1.4/folder/get_info.php',(('folder_key', folder_key), ('response_format', 'json'), ('recursive','yes')))
file_array = __mediafire_crawler(folder_key, folder_info['response']['folder_info']['name']+'/',link)
return 0
def __mediafire_file(parsed, link):
if link.find('folder') != -1:
return [2,'']
result = (parsed.find('div',class_='download_link').find('script'))
regex = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
result = re.findall(regex, result.string)
@ -99,30 +146,33 @@ def __mediafire_file(parsed): @@ -99,30 +146,33 @@ def __mediafire_file(parsed):
else:
return [0,'']
def __mediafire_folder(parsed):
pass
def _parse_files(parsed, link, __func_host): #this function is intended for folders only, but it may work for single files as well
file_array = __func_host(parsed)
file_array = __func_host(parsed, link) #['filename','directurl']
input_invalid = True
while input_invalid:
user_input = input('\nSelect your desired files by their indexes, comma separated (ex. 1,3,4): ')
try:
user_input = user_input.replace(' ',"").split(',')
for i in range(len(user_input)):
user_input[i] = int(user_input[i])-1
except:
print('Your input was invalid, try again')
else:
input_invalid = False
for i in user_input:
try:
pull = file_array[i][1]
except:
print('Errored index, continuing...')
continue
_download_file(link,pull)
if args.interactive:
while input_invalid:
user_input = input('\nSelect your desired files by their indexes, comma separated (ex. 1,3,4): ')
try:
user_input = user_input.replace(' ',"").split(',')
for i in range(len(user_input)):
user_input[i] = int(user_input[i])-1
except:
print('Your input was invalid, try again')
else:
input_invalid = False
for i in user_input:
try:
pull = file_array[i][1]
except:
print('Errored index, continuing...')
continue
_download_file(link,pull)
elif result == 0:
return 0
else:
for i in file_array:
_download_file(link, i[1])
def url2bs(link): #simple, feed a link, poop a beautifulsoup object
try:
@ -144,16 +194,16 @@ def parse_link(link): #core function, if you'll ever intend to import it obv. fe @@ -144,16 +194,16 @@ def parse_link(link): #core function, if you'll ever intend to import it obv. fe
except:
raise ValueError
if url.netloc.endswith('files.fm'): result = __filesfm_file(parsed)
if url.netloc.endswith('files.fm'): result = __filesfm_file(parsed); __func_host = __filesm_folder
elif url.netloc.endswith('.zippyshare.com'): result = __zippyshare(parsed, url.netloc)
elif url.netloc.endswith('.mediafire.com'): result = __mediafire_file(parsed)
elif url.netloc.endswith('.mediafire.com'): result = __mediafire_file(parsed, link); __func_host = __mediafire_folder
else: print('wrong url'); return -1
if result[0] == 0: print('url could not be retrieved'); return -1
elif result[0] == 1:
if len(result) == 3: return _download_file(link,result[1], headers)
else: return _download_file(link,result[1])
elif result[0] == 2: return _parse_files(parsed, link, __filesfm_folder)
elif result[0] == 2: return _parse_files(parsed, link, __func_host)
else: print('this should not happen'); return -1
if __name__ == '__main__':

Loading…
Cancel
Save