You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

218 lines
8.0 KiB

from sys import version_info
from bs4 import BeautifulSoup
from urllib.parse import urlparse, unquote, urlencode
import shutil, cgi, os, urllib.request, argparse, sys, re, json, gettext
gettext.install('messages', localedir='po')
#global variables
base_folder = 'fmloader/'
version = '0.0.5'
#end global variables
#declarative jumbo
try:
assert version_info >= (3,6)
except:
print(_("you must have python 3.6 or higher installed\nthe script was intended to run on 3.6 or higher and won't account for python2 at all as well!"))
sys.exit()
#end declarative jumbo
def check_folder(folder):
try:
os.mkdir(folder)
except FileExistsError:
pass #¯\_(ツ)_/¯
def _declare_opener(additional_header=None): #do not touch, it works as is, not that it needs better user-agents >.>
opener = urllib.request.build_opener()
if additional_header is None:
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0')]
else:
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0')] + additional_header
urllib.request.install_opener(opener)
def _download_file(link, file_link, additional_header=None, additional_fold=''): #original file link for the referrer, file_link is actual download link
try:
if additional_header is not None:
_declare_opener([('Referer', link),additional_header])
else:
_declare_opener([('Referer', link),])
with urllib.request.urlopen(file_link) as response:
folder = base_folder+additional_fold
Buffy = folder.split('/')
buf = ''
for i in range(len(Buffy)-1):
buf += Buffy[i] + '/'
check_folder(buf)
try:
_, params = cgi.parse_header(response.headers.get('Content-Disposition', ''))
print('Downloading file '+params['filename']+'...')
out_file = open(folder+params['filename'], 'wb')
except KeyError:
filename = unquote(os.path.basename(urlparse(file_link).path))
print('Downloading file '+filename+'...')
out_file = open(folder+filename, 'wb')
shutil.copyfileobj(response, out_file)
except ValueError:
raise ValueError
print(_('Something\'s wrong with the link download routine'))
return -1
else:
print(_('Done!'))
return 0
def __zippyshare(parsed,baseurl):
try:
result = parsed.find('div', class_='right').find('script').string.split(' ')[6:17]
for i in range(len(result)): result[i] = result[i].replace('(','').replace(')','').replace('\"','').replace(';','').replace('\n','')
href = result[0] + str( (int(result[2]) % int(result[4])) + (int(result[6]) % int(result[8])) ) + result[10]
return [1,'http://' + baseurl + href]
except:
return [0,'']
def __filesfm_folder(parsed, link):
file_array = []
counter = 1
for i in parsed.find_all('div', class_='file'):
file_array.append([i.get('title').strip().split('<br/>'),i.find('div', class_='item_name').get('onclick').replace('\'',"")[16:]])
print(str(counter)+' - '+file_array[-1][0][0]+' -- '+file_array[-1][0][1][:-1])
counter = counter+1
return file_array
def __filesfm_file(parsed):
result = [0,'']
try:
result[1] = (parsed.find('div', class_='drop-down-options').find('a').get('href'))
except:
#print(_("Required div element not found or incorrect site"))
return result
if result[1].startswith('//'):
if args.interactive:
result[0] = 2
else:
result = [1,str('https://'+result[1][2:])]
elif result[1].startswith('/d'):
result = [1,str('https://files.fm'+result[1])]
return result
def get_json(link, post=None):
if post is not None:
data = urlencode(post)
data = bytearray(data, 'utf-8')
req = urllib.request.Request(link,data)
request = urllib.request.urlopen(req)
else:
request = urllib.request.urlopen(link)
return json.loads(request.read().decode('utf-8'))
def __mediafire_crawler(folder_key, folder, link):
folder_folders = get_json('https://www.mediafire.com/api/1.4/folder/get_content.php?r=mgsp&content_type=folders&filter=all&order_by=name&order_direction=asc&chunk=1&version=1.5&folder_key='+folder_key+'&response_format=json')
folder_files = get_json('https://www.mediafire.com/api/1.4/folder/get_content.php?r=plqv&content_type=files&filter=all&order_by=name&order_direction=asc&chunk=1&version=1.5&folder_key='+folder_key+'&response_format=json')
folders = folder_folders['response']['folder_content']['folders']
files = folder_files['response']['folder_content']['files']
buf = []
if folders is not None:
for i in folders:
buf += __mediafire_crawler(i['folderkey'],folder+i['name']+'/', link)
for i in files:
file = __mediafire_file(url2bs(i['links']['normal_download']),i['links']['normal_download'])[1]
buf.append([i['filename'],file,folder])
_download_file(link, file, additional_fold=folder)
return buf
def __mediafire_folder(parsed, link):
folder_key = link[link.find('folder')+7:link.rfind('/')]
_declare_opener([('Referer', link),('Content-Type','application/json')])
folder_info = get_json('https://www.mediafire.com/api/1.4/folder/get_info.php',(('folder_key', folder_key), ('response_format', 'json'), ('recursive','yes')))
__mediafire_crawler(folder_key, folder_info['response']['folder_info']['name']+'/',link)
return 0
def __mediafire_file(parsed, link):
if link.find('folder') != -1:
return [2,'']
result = (parsed.find('div',class_='download_link').find('script'))
regex = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
result = re.findall(regex, result.string)
if len(result) == 1:
return [1,result[0]]
else:
return [0,'']
def _parse_files(parsed, link, __func_host): #this function is intended for folders only, but it may work for single files as well
file_array = __func_host(parsed, link) #['filename','directurl']
input_invalid = True
if args.interactive:
while input_invalid:
user_input = input('\n' + _('Select your desired files by their indexes, comma separated (ex. 1,3,4): '))
try:
user_input = user_input.replace(' ',"").split(',')
for i in range(len(user_input)):
user_input[i] = int(user_input[i])-1
except:
print('Your input was invalid, try again')
else:
input_invalid = False
for i in user_input:
try:
pull = file_array[i][1]
except:
print(_('Errored index, continuing...'))
continue
_download_file(link,pull)
elif file_array == 0:
return 0
else:
for i in file_array:
_download_file(link, i[1])
def url2bs(link): #simple, feed a link, poop a beautifulsoup object
try:
parsed = urllib.request.urlopen(link)
parsed_bs = BeautifulSoup(parsed.read().decode('utf-8'), 'html.parser')
except ValueError:
print(_("Incorrect URL typed in, please copy and paste the link from the browser bar if possible! (recommended formatting: https://files.fm/u/{FILEID} )"))
return -1
else:
return parsed_bs
def parse_link(link): #core function, if you'll ever intend to import it obv. feed it with a files.fm link
check_folder(base_folder)
_declare_opener()
parsed = url2bs(link)
try:
url = urlparse(link)
except:
raise ValueError
if url.netloc.endswith('files.fm'): result = __filesfm_file(parsed); __func_host = __filesm_folder
elif url.netloc.endswith('.zippyshare.com'): result = __zippyshare(parsed, url.netloc)
elif url.netloc.endswith('.mediafire.com'): result = __mediafire_file(parsed, link); __func_host = __mediafire_folder
else: print('wrong url'); return -1
if result[0] == 0: print('url could not be retrieved'); return -1
elif result[0] == 1:
if len(result) == 3: return _download_file(link,result[1], headers)
else: return _download_file(link,result[1])
elif result[0] == 2: return _parse_files(parsed, link, __func_host)
else: print(_('this should not happen')); return -1
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=_('FilesFM python file&folder downloader'))
parser.add_argument('--interactive','-i', action='store_true', help=_('Enables prompts done during the download\nEnable this to activate partial folder downloads'))
parser.add_argument('urls', help=_('links to filehosts'), nargs='*', type=str)
args = parser.parse_args()
if len(sys.argv) > 1:
for i in args.urls:
parse_link(i)
else:
parser.print_help()
sys.exit(1)