You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
218 lines
8.0 KiB
218 lines
8.0 KiB
from sys import version_info |
|
from bs4 import BeautifulSoup |
|
from urllib.parse import urlparse, unquote, urlencode |
|
import shutil, cgi, os, urllib.request, argparse, sys, re, json, gettext |
|
gettext.install('messages', localedir='po') |
|
|
|
#global variables |
|
base_folder = 'fmloader/' |
|
version = '0.0.5' |
|
#end global variables |
|
|
|
#declarative jumbo |
|
try: |
|
assert version_info >= (3,6) |
|
except: |
|
print(_("you must have python 3.6 or higher installed\nthe script was intended to run on 3.6 or higher and won't account for python2 at all as well!")) |
|
sys.exit() |
|
#end declarative jumbo |
|
|
|
def check_folder(folder): |
|
try: |
|
os.mkdir(folder) |
|
except FileExistsError: |
|
pass #¯\_(ツ)_/¯ |
|
|
|
def _declare_opener(additional_header=None): #do not touch, it works as is, not that it needs better user-agents >.> |
|
opener = urllib.request.build_opener() |
|
if additional_header is None: |
|
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0')] |
|
else: |
|
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0')] + additional_header |
|
urllib.request.install_opener(opener) |
|
|
|
def _download_file(link, file_link, additional_header=None, additional_fold=''): #original file link for the referrer, file_link is actual download link |
|
try: |
|
if additional_header is not None: |
|
_declare_opener([('Referer', link),additional_header]) |
|
else: |
|
_declare_opener([('Referer', link),]) |
|
with urllib.request.urlopen(file_link) as response: |
|
folder = base_folder+additional_fold |
|
Buffy = folder.split('/') |
|
buf = '' |
|
for i in range(len(Buffy)-1): |
|
buf += Buffy[i] + '/' |
|
check_folder(buf) |
|
try: |
|
_, params = cgi.parse_header(response.headers.get('Content-Disposition', '')) |
|
print('Downloading file '+params['filename']+'...') |
|
out_file = open(folder+params['filename'], 'wb') |
|
except KeyError: |
|
filename = unquote(os.path.basename(urlparse(file_link).path)) |
|
print('Downloading file '+filename+'...') |
|
out_file = open(folder+filename, 'wb') |
|
shutil.copyfileobj(response, out_file) |
|
except ValueError: |
|
raise ValueError |
|
print(_('Something\'s wrong with the link download routine')) |
|
return -1 |
|
else: |
|
print(_('Done!')) |
|
return 0 |
|
|
|
def __zippyshare(parsed,baseurl): |
|
try: |
|
result = parsed.find('div', class_='right').find('script').string.split(' ')[6:17] |
|
|
|
for i in range(len(result)): result[i] = result[i].replace('(','').replace(')','').replace('\"','').replace(';','').replace('\n','') |
|
|
|
href = result[0] + str( (int(result[2]) % int(result[4])) + (int(result[6]) % int(result[8])) ) + result[10] |
|
return [1,'http://' + baseurl + href] |
|
except: |
|
return [0,''] |
|
def __filesfm_folder(parsed, link): |
|
file_array = [] |
|
counter = 1 |
|
for i in parsed.find_all('div', class_='file'): |
|
file_array.append([i.get('title').strip().split('<br/>'),i.find('div', class_='item_name').get('onclick').replace('\'',"")[16:]]) |
|
print(str(counter)+' - '+file_array[-1][0][0]+' -- '+file_array[-1][0][1][:-1]) |
|
counter = counter+1 |
|
return file_array |
|
|
|
def __filesfm_file(parsed): |
|
result = [0,''] |
|
try: |
|
result[1] = (parsed.find('div', class_='drop-down-options').find('a').get('href')) |
|
except: |
|
#print(_("Required div element not found or incorrect site")) |
|
return result |
|
if result[1].startswith('//'): |
|
if args.interactive: |
|
result[0] = 2 |
|
else: |
|
result = [1,str('https://'+result[1][2:])] |
|
elif result[1].startswith('/d'): |
|
result = [1,str('https://files.fm'+result[1])] |
|
return result |
|
|
|
def get_json(link, post=None): |
|
if post is not None: |
|
data = urlencode(post) |
|
data = bytearray(data, 'utf-8') |
|
|
|
req = urllib.request.Request(link,data) |
|
request = urllib.request.urlopen(req) |
|
else: |
|
request = urllib.request.urlopen(link) |
|
return json.loads(request.read().decode('utf-8')) |
|
|
|
def __mediafire_crawler(folder_key, folder, link): |
|
folder_folders = get_json('https://www.mediafire.com/api/1.4/folder/get_content.php?r=mgsp&content_type=folders&filter=all&order_by=name&order_direction=asc&chunk=1&version=1.5&folder_key='+folder_key+'&response_format=json') |
|
folder_files = get_json('https://www.mediafire.com/api/1.4/folder/get_content.php?r=plqv&content_type=files&filter=all&order_by=name&order_direction=asc&chunk=1&version=1.5&folder_key='+folder_key+'&response_format=json') |
|
|
|
folders = folder_folders['response']['folder_content']['folders'] |
|
files = folder_files['response']['folder_content']['files'] |
|
|
|
buf = [] |
|
if folders is not None: |
|
for i in folders: |
|
buf += __mediafire_crawler(i['folderkey'],folder+i['name']+'/', link) |
|
for i in files: |
|
file = __mediafire_file(url2bs(i['links']['normal_download']),i['links']['normal_download'])[1] |
|
buf.append([i['filename'],file,folder]) |
|
_download_file(link, file, additional_fold=folder) |
|
return buf |
|
|
|
def __mediafire_folder(parsed, link): |
|
folder_key = link[link.find('folder')+7:link.rfind('/')] |
|
|
|
_declare_opener([('Referer', link),('Content-Type','application/json')]) |
|
folder_info = get_json('https://www.mediafire.com/api/1.4/folder/get_info.php',(('folder_key', folder_key), ('response_format', 'json'), ('recursive','yes'))) |
|
__mediafire_crawler(folder_key, folder_info['response']['folder_info']['name']+'/',link) |
|
return 0 |
|
|
|
def __mediafire_file(parsed, link): |
|
if link.find('folder') != -1: |
|
return [2,''] |
|
|
|
result = (parsed.find('div',class_='download_link').find('script')) |
|
regex = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+" |
|
result = re.findall(regex, result.string) |
|
|
|
if len(result) == 1: |
|
return [1,result[0]] |
|
else: |
|
return [0,''] |
|
|
|
def _parse_files(parsed, link, __func_host): #this function is intended for folders only, but it may work for single files as well |
|
file_array = __func_host(parsed, link) #['filename','directurl'] |
|
input_invalid = True |
|
if args.interactive: |
|
while input_invalid: |
|
user_input = input('\n' + _('Select your desired files by their indexes, comma separated (ex. 1,3,4): ')) |
|
try: |
|
user_input = user_input.replace(' ',"").split(',') |
|
for i in range(len(user_input)): |
|
user_input[i] = int(user_input[i])-1 |
|
except: |
|
print('Your input was invalid, try again') |
|
else: |
|
input_invalid = False |
|
|
|
for i in user_input: |
|
try: |
|
pull = file_array[i][1] |
|
except: |
|
print(_('Errored index, continuing...')) |
|
continue |
|
_download_file(link,pull) |
|
elif file_array == 0: |
|
return 0 |
|
else: |
|
for i in file_array: |
|
_download_file(link, i[1]) |
|
|
|
def url2bs(link): #simple, feed a link, poop a beautifulsoup object |
|
try: |
|
parsed = urllib.request.urlopen(link) |
|
parsed_bs = BeautifulSoup(parsed.read().decode('utf-8'), 'html.parser') |
|
except ValueError: |
|
print(_("Incorrect URL typed in, please copy and paste the link from the browser bar if possible! (recommended formatting: https://files.fm/u/{FILEID} )")) |
|
return -1 |
|
else: |
|
return parsed_bs |
|
|
|
def parse_link(link): #core function, if you'll ever intend to import it obv. feed it with a files.fm link |
|
check_folder(base_folder) |
|
_declare_opener() |
|
parsed = url2bs(link) |
|
|
|
try: |
|
url = urlparse(link) |
|
except: |
|
raise ValueError |
|
|
|
if url.netloc.endswith('files.fm'): result = __filesfm_file(parsed); __func_host = __filesm_folder |
|
elif url.netloc.endswith('.zippyshare.com'): result = __zippyshare(parsed, url.netloc) |
|
elif url.netloc.endswith('.mediafire.com'): result = __mediafire_file(parsed, link); __func_host = __mediafire_folder |
|
else: print('wrong url'); return -1 |
|
|
|
if result[0] == 0: print('url could not be retrieved'); return -1 |
|
elif result[0] == 1: |
|
if len(result) == 3: return _download_file(link,result[1], headers) |
|
else: return _download_file(link,result[1]) |
|
elif result[0] == 2: return _parse_files(parsed, link, __func_host) |
|
else: print(_('this should not happen')); return -1 |
|
|
|
if __name__ == '__main__': |
|
parser = argparse.ArgumentParser(description=_('FilesFM python file&folder downloader')) |
|
parser.add_argument('--interactive','-i', action='store_true', help=_('Enables prompts done during the download\nEnable this to activate partial folder downloads')) |
|
parser.add_argument('urls', help=_('links to filehosts'), nargs='*', type=str) |
|
args = parser.parse_args() |
|
if len(sys.argv) > 1: |
|
for i in args.urls: |
|
parse_link(i) |
|
else: |
|
parser.print_help() |
|
sys.exit(1)
|
|
|