123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218 |
- from sys import version_info
- from bs4 import BeautifulSoup
- from urllib.parse import urlparse, unquote, urlencode
- import shutil, cgi, os, urllib.request, argparse, sys, re, json, gettext
- gettext.install('messages', localedir='po')
-
- #global variables
- base_folder = 'fmloader/'
- version = '0.0.5'
- #end global variables
-
- #declarative jumbo
- try:
- assert version_info >= (3,6)
- except:
- print(_("you must have python 3.6 or higher installed\nthe script was intended to run on 3.6 or higher and won't account for python2 at all as well!"))
- sys.exit()
- #end declarative jumbo
-
- def check_folder(folder):
- try:
- os.mkdir(folder)
- except FileExistsError:
- pass #¯\_(ツ)_/¯
-
- def _declare_opener(additional_header=None): #do not touch, it works as is, not that it needs better user-agents >.>
- opener = urllib.request.build_opener()
- if additional_header is None:
- opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0')]
- else:
- opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0')] + additional_header
- urllib.request.install_opener(opener)
-
- def _download_file(link, file_link, additional_header=None, additional_fold=''): #original file link for the referrer, file_link is actual download link
- try:
- if additional_header is not None:
- _declare_opener([('Referer', link),additional_header])
- else:
- _declare_opener([('Referer', link),])
- with urllib.request.urlopen(file_link) as response:
- folder = base_folder+additional_fold
- Buffy = folder.split('/')
- buf = ''
- for i in range(len(Buffy)-1):
- buf += Buffy[i] + '/'
- check_folder(buf)
- try:
- _, params = cgi.parse_header(response.headers.get('Content-Disposition', ''))
- print('Downloading file '+params['filename']+'...')
- out_file = open(folder+params['filename'], 'wb')
- except KeyError:
- filename = unquote(os.path.basename(urlparse(file_link).path))
- print('Downloading file '+filename+'...')
- out_file = open(folder+filename, 'wb')
- shutil.copyfileobj(response, out_file)
- except ValueError:
- raise ValueError
- print(_('Something\'s wrong with the link download routine'))
- return -1
- else:
- print(_('Done!'))
- return 0
-
- def __zippyshare(parsed,baseurl):
- try:
- result = parsed.find('div', class_='right').find('script').string.split(' ')[6:17]
-
- for i in range(len(result)): result[i] = result[i].replace('(','').replace(')','').replace('\"','').replace(';','').replace('\n','')
-
- href = result[0] + str( (int(result[2]) % int(result[4])) + (int(result[6]) % int(result[8])) ) + result[10]
- return [1,'http://' + baseurl + href]
- except:
- return [0,'']
- def __filesfm_folder(parsed, link):
- file_array = []
- counter = 1
- for i in parsed.find_all('div', class_='file'):
- file_array.append([i.get('title').strip().split('<br/>'),i.find('div', class_='item_name').get('onclick').replace('\'',"")[16:]])
- print(str(counter)+' - '+file_array[-1][0][0]+' -- '+file_array[-1][0][1][:-1])
- counter = counter+1
- return file_array
-
- def __filesfm_file(parsed):
- result = [0,'']
- try:
- result[1] = (parsed.find('div', class_='drop-down-options').find('a').get('href'))
- except:
- #print(_("Required div element not found or incorrect site"))
- return result
- if result[1].startswith('//'):
- if args.interactive:
- result[0] = 2
- else:
- result = [1,str('https://'+result[1][2:])]
- elif result[1].startswith('/d'):
- result = [1,str('https://files.fm'+result[1])]
- return result
-
- def get_json(link, post=None):
- if post is not None:
- data = urlencode(post)
- data = bytearray(data, 'utf-8')
-
- req = urllib.request.Request(link,data)
- request = urllib.request.urlopen(req)
- else:
- request = urllib.request.urlopen(link)
- return json.loads(request.read().decode('utf-8'))
-
- def __mediafire_crawler(folder_key, folder, link):
- folder_folders = get_json('https://www.mediafire.com/api/1.4/folder/get_content.php?r=mgsp&content_type=folders&filter=all&order_by=name&order_direction=asc&chunk=1&version=1.5&folder_key='+folder_key+'&response_format=json')
- folder_files = get_json('https://www.mediafire.com/api/1.4/folder/get_content.php?r=plqv&content_type=files&filter=all&order_by=name&order_direction=asc&chunk=1&version=1.5&folder_key='+folder_key+'&response_format=json')
-
- folders = folder_folders['response']['folder_content']['folders']
- files = folder_files['response']['folder_content']['files']
-
- buf = []
- if folders is not None:
- for i in folders:
- buf += __mediafire_crawler(i['folderkey'],folder+i['name']+'/', link)
- for i in files:
- file = __mediafire_file(url2bs(i['links']['normal_download']),i['links']['normal_download'])[1]
- buf.append([i['filename'],file,folder])
- _download_file(link, file, additional_fold=folder)
- return buf
-
- def __mediafire_folder(parsed, link):
- folder_key = link[link.find('folder')+7:link.rfind('/')]
-
- _declare_opener([('Referer', link),('Content-Type','application/json')])
- folder_info = get_json('https://www.mediafire.com/api/1.4/folder/get_info.php',(('folder_key', folder_key), ('response_format', 'json'), ('recursive','yes')))
- __mediafire_crawler(folder_key, folder_info['response']['folder_info']['name']+'/',link)
- return 0
-
- def __mediafire_file(parsed, link):
- if link.find('folder') != -1:
- return [2,'']
-
- result = (parsed.find('div',class_='download_link').find('script'))
- regex = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
- result = re.findall(regex, result.string)
-
- if len(result) == 1:
- return [1,result[0]]
- else:
- return [0,'']
-
- def _parse_files(parsed, link, __func_host): #this function is intended for folders only, but it may work for single files as well
- file_array = __func_host(parsed, link) #['filename','directurl']
- input_invalid = True
- if args.interactive:
- while input_invalid:
- user_input = input('\n' + _('Select your desired files by their indexes, comma separated (ex. 1,3,4): '))
- try:
- user_input = user_input.replace(' ',"").split(',')
- for i in range(len(user_input)):
- user_input[i] = int(user_input[i])-1
- except:
- print('Your input was invalid, try again')
- else:
- input_invalid = False
-
- for i in user_input:
- try:
- pull = file_array[i][1]
- except:
- print(_('Errored index, continuing...'))
- continue
- _download_file(link,pull)
- elif file_array == 0:
- return 0
- else:
- for i in file_array:
- _download_file(link, i[1])
-
- def url2bs(link): #simple, feed a link, poop a beautifulsoup object
- try:
- parsed = urllib.request.urlopen(link)
- parsed_bs = BeautifulSoup(parsed.read().decode('utf-8'), 'html.parser')
- except ValueError:
- print(_("Incorrect URL typed in, please copy and paste the link from the browser bar if possible! (recommended formatting: https://files.fm/u/{FILEID} )"))
- return -1
- else:
- return parsed_bs
-
- def parse_link(link): #core function, if you'll ever intend to import it obv. feed it with a files.fm link
- check_folder(base_folder)
- _declare_opener()
- parsed = url2bs(link)
-
- try:
- url = urlparse(link)
- except:
- raise ValueError
-
- if url.netloc.endswith('files.fm'): result = __filesfm_file(parsed); __func_host = __filesm_folder
- elif url.netloc.endswith('.zippyshare.com'): result = __zippyshare(parsed, url.netloc)
- elif url.netloc.endswith('.mediafire.com'): result = __mediafire_file(parsed, link); __func_host = __mediafire_folder
- else: print('wrong url'); return -1
-
- if result[0] == 0: print('url could not be retrieved'); return -1
- elif result[0] == 1:
- if len(result) == 3: return _download_file(link,result[1], headers)
- else: return _download_file(link,result[1])
- elif result[0] == 2: return _parse_files(parsed, link, __func_host)
- else: print(_('this should not happen')); return -1
-
- if __name__ == '__main__':
- parser = argparse.ArgumentParser(description=_('FilesFM python file&folder downloader'))
- parser.add_argument('--interactive','-i', action='store_true', help=_('Enables prompts done during the download\nEnable this to activate partial folder downloads'))
- parser.add_argument('urls', help=_('links to filehosts'), nargs='*', type=str)
- args = parser.parse_args()
- if len(sys.argv) > 1:
- for i in args.urls:
- parse_link(i)
- else:
- parser.print_help()
- sys.exit(1)
|