Browse Source

Added query based downloading

master
parent
commit
b619c4322e
1 changed files with 207 additions and 115 deletions
  1. 207
    115
      pluralsight.py

+ 207
- 115
pluralsight.py View File

@@ -1,11 +1,15 @@
import json
import os
import random
import re
import secrets
import string
import time
from pathlib import Path
from sys import platform
from typing import Dict, List, Tuple

import click
import requests
import youtube_dl
from bs4 import BeautifulSoup
@@ -23,53 +27,58 @@ MAX_COURSE_DOWNLOAD_COUNT = 5
# Denotes Time.Sleep() duration in seconds
SLEEP_DURATION = 5

# Check if current OS/platform is Windows
IS_WINDOWS = platform.startswith("win")

# Master Directory Path
MASTER_DIRECTORY = os.path.join(os.path.expanduser("~/Desktop"), "Pluralsight")
WORKING_DIRECTORY = os.path.dirname(os.path.realpath(__file__))

# Path of the text file where pluralsight account details will be stored
ACCOUNT_FILE_PATH = os.path.join(MASTER_DIRECTORY, "ps.txt")

# Path of the text file where pluralsight courses to be downloaded will be stored
COURSE_LINKS_FILE_PATH = os.path.join(MASTER_DIRECTORY, "c.txt")
CREDENTIAL_FILE_PATH = os.path.join(WORKING_DIRECTORY, "credential.txt")

# Path of the directory where downloaded courses will be saved
SAVE_DIRECTORY_PATH = os.path.join(MASTER_DIRECTORY, "Courses")
SAVE_DIRECTORY_PATH = os.path.join(WORKING_DIRECTORY, "Courses")

# Path of the archive text file used by Youtube-dl to keep track of downloaded videos
ARCHIVE_FILE_PATH = os.path.join(MASTER_DIRECTORY, "archive.txt")
# JSON File (.json) containing the JSON of the paths/courses dump of Pluralsight
# JSON_FILE_URL = os.path.join(WORKING_DIRECTORY, "pluralsight.json")
JSON_FILE_URL = "https://git.teknik.io/CanWePlsRapeTheShitOuttaPluralsight/RapePluralsight/raw/branch/master/pluralsight.json"

# Options for youtube-dl. For a complete list of options, check https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312
ydl_options = {
'writesubtitles': True,
'nooverwrites': True,
'download_archive': ARCHIVE_FILE_PATH,
'sleep_interval': 20,
'max_sleep_interval': 40,
# 'outtmpl': f"{SAVE_DIRECTORY_PATH}/%(playlist)s/%(chapter_number)s - %(chapter)s/%(playlist_index)s - %(title)s.%(ext)s"
# Windows Users should comment out the previous outtmpl and uncomment the following
# 'outtmpl': f"{save_directory_path}\\%(playlist)s\\%(chapter_number)s - %(chapter)s\\%(playlist_index)s - %(title)s.%(ext)s"
'sleep_interval': 10,
'max_sleep_interval': 20,
}

if platform.startswith("win"):
ydl_options[
'outtmpl'] = f"{SAVE_DIRECTORY_PATH}\\%(playlist)s\\%(chapter_number)s - %(chapter)s\\%(playlist_index)s - %(title)s.%(ext)s"
else:
ydl_options[
'outtmpl'] = f"{SAVE_DIRECTORY_PATH}/%(playlist)s/%(chapter_number)s - %(chapter)s/%(playlist_index)s - %(title)s.%(ext)s"
# endregion


class PluralsightPath(object):
def __init__(self, id: int, link: str, title: str, slug: str, course_links: List[str]):
self.id = id
self.link = link
self.title = title
self.slug = slug
self.course_links = course_links

# endregion

class TempGmail:
class UserSelection:
def __init__(self, selected_paths: List[PluralsightPath], selected_course_link: str = "", is_exit: bool = False):
self.selected_paths = selected_paths
self.selected_course_link = selected_course_link
self.is_exit = is_exit


class DisposableMail:
"""
This class is used to generate random disposable gmails from https://freetempemails.com
and use them for registration purpose
This class is used to generate random disposable emails
"""

def __init__(self, email_address: str):
self.email_address = email_address
def __init__(self):
self.email_address = requests.post("https://gmailnator.com/index/indexquery", {'action': 'GenerateEmail'}).text

def get_email_id(self) -> object:
def get_mail_item_id(self) -> object:
post_url = "https://gmailnator.com/mailbox/mailboxquery"
post_data = {
'action': 'LoadMailList',
@@ -87,15 +96,14 @@ class TempGmail:

return mail_id


except Exception as e:
except:
pass

def get_verification_link(self) -> str:
post_url = "https://gmailnator.com/mailbox/get_single_message/"
post_data = {
'action': 'LoadMailList',
'message_id': self.get_email_id(),
'message_id': self.get_mail_item_id(),
'email': self.email_address.split("+")[0]
}

@@ -109,7 +117,7 @@ class TempGmail:

class Pluralsight:
"""
This class handles the registration, verification and bootstrapping of new Pluralsight accounts
This class handles the registration and verification of new Pluralsight accounts
"""

def __init__(self, email: str, password: str, is_headless: bool = True):
@@ -130,7 +138,7 @@ class Pluralsight:
self.driver.quit()

@staticmethod
def get_name() -> str:
def get_name() -> Tuple[str, str]:
"""
Generate a random string to be used as first or last name

@@ -138,9 +146,12 @@ class Pluralsight:
str: Generated string
"""

letters = string.ascii_lowercase
request_url = "http://names.drycodes.com/1?nameOptions=boy_names" if random.choice([True, False]) \
else "http://names.drycodes.com/1?nameOptions=girl_names"

first_last_name = requests.get(request_url).text.strip('"[]').split('_')

return ''.join(random.choice(letters) for _ in range(random.randint(5, 15)))
return first_last_name

def register(self) -> None:
"""
@@ -165,9 +176,11 @@ class Pluralsight:
lastname_input_element = self.driver.find_element_by_name("lastname")
tos_checkbox_element = self.driver.find_element_by_name("optInBox")

firstName, lastName = self.get_name()

email_input_element.send_keys(self.email)
firstname_input_element.send_keys(self.get_name())
lastname_input_element.send_keys(self.get_name())
firstname_input_element.send_keys(firstName)
lastname_input_element.send_keys(lastName)
tos_checkbox_element.click()

time.sleep(SLEEP_DURATION)
@@ -181,9 +194,12 @@ class Pluralsight:
cancel_button_element = self.driver.find_element_by_class_name("cancelButton---CKAut")
cancel_button_element.click()

time.sleep(SLEEP_DURATION)

def set_password(self, verification_link: str) -> None:
"""
Sets password in the given verification link

Args:
verification_link: The verification link (as string) to set up password
"""
@@ -204,83 +220,57 @@ class Pluralsight:

time.sleep(SLEEP_DURATION)

def bootstrap(self):
"""
Bootstraps newly registered accounts to prevent 403 errors in youtube-dl
"""

username_input_element = self.driver.find_element_by_id("Username")
password_input_element = self.driver.find_element_by_id("Password")
login_button_element = self.driver.find_element_by_id("login")

username_input_element.send_keys(self.email)
password_input_element.send_keys(self.password)
class Progress:
def __init__(self, id: int, is_all: bool):
self.id = id
self.course_link = ""
self.is_all = is_all

time.sleep(1)

login_button_element.click()

time.sleep(SLEEP_DURATION)

cancel_button_element = self.driver.find_element_by_class_name("cancelButton---CKAut")
cancel_button_element.click()


def get_password(length: int = 30) -> str:
def get_password(min_length: int = 25, max_length: int = 50) -> str:
"""
Generates a random password using ascii letters and numerical digits
Args:
length: Length of the password, default is 30
min_length: Minimum length of the password, default is 25
max_length: Minimum length of the password, default is 50

Returns: Generated password as string
"""

length = random.randint(min_length, max_length)
alphabet = string.ascii_letters + string.digits
password = ''.join(secrets.choice(alphabet) for _ in range(length))

return password


def generate_email(is_gmail: bool = True) -> str:
def create_pluralsight_account(credential_file_path: str) -> Dict[str, str]:
"""
Generates a new email

Returns: Generated email string
"""

gmailnator_gen_url = "https://gmailnator.com/index/indexquery"
post_data = {
'action': 'GenerateEmail'
}
email = requests.post(gmailnator_gen_url, post_data).text

return email

Creates new Pluralsight account and returns the email/password as a dictionary

def create_pluralsight_account() -> None:
"""
Creates new Pluralsight account using Pluralsight and TempMail
Returns:
Dict[str, str]: Dictionary containing email and password with eponymous pair keys
"""

try:
email = generate_email()
password = get_password()
disposable_email = DisposableMail()
password = get_password()

with Pluralsight(email=email, password=password, is_headless=HIDE_SELENIUM_INSTANCES) as ps:
ps.register()
with Pluralsight(email=disposable_email.email_address, password=password,
is_headless=HIDE_SELENIUM_INSTANCES) as ps:
ps.register()

verification_link = TempGmail(email_address=email).get_verification_link()
verification_link = disposable_email.get_verification_link()

ps.set_password(verification_link=verification_link)
ps.set_password(verification_link=verification_link)

time.sleep(SLEEP_DURATION)
time.sleep(SLEEP_DURATION)

with open(ACCOUNT_FILE_PATH, 'w+') as account_file:
account_file.write(f"{email}\n")
account_file.write(f"{password}\n")
with open(credential_file_path, 'w+') as account_file:
account_file.write(f"{disposable_email.email_address}\n")
account_file.write(f"{password}\n")

except Exception as e:
print(f"ERROR OCCURRED!!\n\nDETAILS: {e.__str__()} | {e.__context__}")
return {'email': disposable_email, 'password': password}


def download_course(course_link: str, username: str, password: str) -> bool:
@@ -299,54 +289,156 @@ def download_course(course_link: str, username: str, password: str) -> bool:
ydl_options['username'] = username
ydl_options['password'] = password

if IS_WINDOWS:
ydl_options[
'outtmpl'] = f"{SAVE_DIRECTORY_PATH}\\%(playlist)s\\%(chapter_number)s - %(chapter)s\\%(playlist_index)s - %(title)s.%(ext)s"
else:
ydl_options[
'outtmpl'] = f"{SAVE_DIRECTORY_PATH}/%(playlist)s/%(chapter_number)s - %(chapter)s/%(playlist_index)s - %(title)s.%(ext)s"

with youtube_dl.YoutubeDL(ydl_options) as ydl:
ydl.download([course_link])

return True
except Exception as exception:
except:
return False


def main():
if not os.path.exists(COURSE_LINKS_FILE_PATH):
print(f"{COURSE_LINKS_FILE_PATH} NOT FOUND!")
return
def get_all_pluralsight_paths(url: str) -> List[PluralsightPath]:
if url.startswith("http"):
json_string = requests.get(url).text
else:
json_string = Path(JSON_FILE_URL).read_text()

with open(COURSE_LINKS_FILE_PATH, 'r') as course_file:
course_list = [course.rstrip() for course in course_file.readlines()]
all_pluralsight_paths_dicts_list = json.loads(json_string)
all_pluralsight_paths_list = [PluralsightPath(**dict) for dict in all_pluralsight_paths_dicts_list]

download_count = 0
for course in course_list:
try:
while True:
if not os.path.exists(ACCOUNT_FILE_PATH) or download_count > 5:
print("CREATING NEW PLURALSIGHT ACCOUNT")
return all_pluralsight_paths_list

create_pluralsight_account()
download_count = 0

print("SUCCESS! NEW PLURALSIGHT ACCOUNT CREATED.")
def print_pluralsight_paths_and_courses(pluralsight_paths_list: List[PluralsightPath],
search_query: str) -> List[PluralsightPath]:
queried_paths_list = []
for pluralsight_path in pluralsight_paths_list:
if search_query.lower() not in pluralsight_path.title.lower():
continue

with open(ACCOUNT_FILE_PATH, 'r') as account_file:
lines = account_file.readlines()
email = lines[0].rstrip()
password = lines[1].rstrip()
print(f"{pluralsight_path.id} | {pluralsight_path.title}")

print(f"[{email}] DOWNLOADING COURSE: {course}")
course_serial = 1
print("\t0 - [DOWNLOAD ALL COURSES]")
for course_link in pluralsight_path.course_links:
print(f"\t{course_serial} - {course_link}")
course_serial += 1

is_download_success = download_course(course, username=email, password=password)
queried_paths_list.append(pluralsight_path)

if not is_download_success:
os.remove(ACCOUNT_FILE_PATH)
continue
print("0 | [DOWNLOAD ALL PATHS]")

print(f"[{email}] SUCCESSFULLY DOWNLOADED COURSE: {course}")
return queried_paths_list


def get_directory_full_path(root_directory_path: str, pluralsight_path: PluralsightPath) -> str:
directory_name = f"{pluralsight_path.id:03d} - {pluralsight_path.slug}" if IS_WINDOWS \
else f"{pluralsight_path.id:03d} - {pluralsight_path.title}"

directory_full_path = os.path.join(root_directory_path, directory_name)

return directory_full_path


def download_all(all_pluralsight_courses: List[PluralsightPath]):
# Todo Download all
pass


def download_pluralsight_paths(pluralsight_paths=List[PluralsightPath]):
pass


# region Click Prompts

def prompt_download_all() -> bool:
is_download_all = click.prompt("Download All? ", default=False, show_default=True, type=bool, prompt_suffix="")
return is_download_all


def prompt_paths_and_courses_selection(all_pluralsight_paths_list: List[PluralsightPath]) -> UserSelection:
while True:
try:
search_query = click.prompt("Search Query", type=str)

queried_paths = print_pluralsight_paths_and_courses(all_pluralsight_paths_list, search_query)

selected_paths = queried_paths
while True:
path_id = click.prompt("Select Path (by id)", type=int, default=0, show_default=True)

if path_id == 0:
return UserSelection(selected_paths=queried_paths)

for path in queried_paths:
if path_id == path.id:
selected_paths = [path]
break
else:
continue
break

while True:
course_id = click.prompt("Select Course (by id)", type=int, default=0, show_default=True)
if 0 <= course_id <= len(selected_paths[0].course_links):
break

if course_id == 0:
return UserSelection(selected_paths=selected_paths)
return UserSelection(selected_paths, selected_paths[0].course_links[course_id - 1])

except Exception as e:
print(e)
finally:
download_count += 1
if click.prompt("Exit", default=False, show_default=True, type=bool):
return UserSelection([], is_exit=True)

# endregion


def get_credential() -> Tuple[str,str]:
if not os.path.exists(CREDENTIAL_FILE_PATH):
print("CREATING NEW PLURALSIGHT ACCOUNT")
create_pluralsight_account(CREDENTIAL_FILE_PATH)
print("SUCCESS! NEW PLURALSIGHT ACCOUNT CREATED.")

with open(CREDENTIAL_FILE_PATH, 'r') as account_file:
lines = account_file.readlines()
credential = lines[0].rstrip(), lines[1].rstrip()

return credential


def main():
global SAVE_DIRECTORY_PATH

all_pluralsight_paths_list = get_all_pluralsight_paths(JSON_FILE_URL)

if prompt_download_all():
download_all(all_pluralsight_paths_list)
return

user_selection = prompt_paths_and_courses_selection(all_pluralsight_paths_list)

if user_selection.is_exit:
return

while True:
email_address, password = get_credential()

if user_selection.selected_course_link:
download_course(course_link=user_selection.selected_course_link, username=email_address, password=password)
else:
for path in user_selection.selected_paths:
SAVE_DIRECTORY_PATH = get_directory_full_path(SAVE_DIRECTORY_PATH, path)
for course_link in path.course_links:
download_course(course_link=course_link, username=email_address, password=password)


if __name__ == '__main__':

Loading…
Cancel
Save