import json import os import random import re import secrets import string import time from pathlib import Path from sys import platform from typing import Dict, List, Tuple import click import requests import youtube_dl from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.firefox.options import Options # region Global Constant(s) and Readonly Variable(s) # True/False to determine whether selenium instances will be visible or not (headless) HIDE_SELENIUM_INSTANCES = False # The maximum number of courses to download from a single account MAX_COURSE_DOWNLOAD_COUNT = 5 # Denotes Time.Sleep() duration in seconds SELENIUM_SLEEP_DURATION = 5 # Minimum number of seconds to wait between consecutive video downloads MIN_VIDEO_DOWNLOAD_DELAY = 1 # Minimum number of seconds to wait between consecutive video downloads MAX_VIDEO_DOWNLOAD_DELAY = 10 # Check if current OS/platform is Windows IS_WINDOWS = platform.startswith("win") # Master Directory Path (Default: Working Directory) MASTER_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) # Path of the text file where pluralsight account details will be stored CREDENTIAL_FILE_PATH = os.path.join(MASTER_DIRECTORY, "credential.txt") # Path of the archive text file to log all downloaded videos ARCHIVE_FILE_PATH = os.path.join(MASTER_DIRECTORY, "archive.txt") # Progress text file path (Progress file stores progress of download_all()) PROGRESS_FILE_PATH = os.path.join(MASTER_DIRECTORY, "progress.txt") # Path of the directory where downloaded courses will be saved SAVE_DIRECTORY_PATH = os.path.join(MASTER_DIRECTORY, "Courses") # JSON File (.json) containing the JSON of the paths/courses dump of Pluralsight # JSON_FILE_URL = os.path.join(WORKING_DIRECTORY, "pluralsight.json") JSON_FILE_URL = "https://git.teknik.io/CanWePlsRapeTheShitOuttaPluralsight/RapePluralsight/raw/branch/master/pluralsight.json" # Options for youtube-dl. For a complete list of options, check https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312 ydl_options = { 'writesubtitles': True, 'nooverwrites': True, 'sleep_interval': MIN_VIDEO_DOWNLOAD_DELAY, 'max_sleep_interval': MAX_VIDEO_DOWNLOAD_DELAY, 'download_archive': ARCHIVE_FILE_PATH } # endregion class PluralsightPath(object): def __init__(self, id: int, link: str, title: str, slug: str, course_links: List[str]): self.id = id self.link = link self.title = title self.slug = slug self.course_links = course_links class UserSelection: def __init__(self, selected_paths: List[PluralsightPath], selected_course_link: str = "", is_exit: bool = False): self.selected_paths = selected_paths self.selected_course_link = selected_course_link self.is_exit = is_exit class DisposableMail: """ This class is used to generate random disposable emails """ def __init__(self): self.email_address = requests.post("https://gmailnator.com/index/indexquery", {'action': 'GenerateEmail'}).text def get_mail_item_id(self) -> object: post_url = "https://gmailnator.com/mailbox/mailboxquery" post_data = { 'action': 'LoadMailList', 'Email_address': self.email_address } while True: try: time.sleep(1) response_text = requests.post(post_url, post_data).json()[0]['content'] result = re.findall('#(.*)\\">', response_text) mail_id = result[0] return mail_id except: pass def get_verification_link(self) -> str: post_url = "https://gmailnator.com/mailbox/get_single_message/" post_data = { 'action': 'LoadMailList', 'message_id': self.get_mail_item_id(), 'email': self.email_address.split("+")[0] } response_data = requests.post(post_url, post_data).text soup = BeautifulSoup(response_data) for link in soup.findAll('a', href=True): if "https://app.pluralsight.com/id/forgotpassword/reset?token" in link['href']: return link['href'] class Pluralsight: """ This class handles the registration and verification of new Pluralsight accounts """ def __init__(self, email: str, password: str, is_headless: bool = True): if is_headless: options = Options() options.add_argument("--headless") self.driver = webdriver.Firefox(options=options) else: self.driver = webdriver.Firefox() self.email = email self.password = password def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.driver.quit() @staticmethod def get_name() -> Tuple[str, str]: """ Generate a random string to be used as first or last name Returns: str: Generated string """ request_url = "http://names.drycodes.com/1?nameOptions=boy_names" if random.choice([True, False]) \ else "http://names.drycodes.com/1?nameOptions=girl_names" first_last_name = requests.get(request_url).text.strip('"[]').split('_') return first_last_name def register(self) -> None: """ Registers new Pluralsight account """ self.driver.get("https://www.pluralsight.com/offer/2020/free-april-month") time.sleep(SELENIUM_SLEEP_DURATION) accept_cookie_button_element = self.driver.find_element_by_class_name("cookie_notification--opt_in") accept_cookie_button_element.click() time.sleep(1) sign_up_now_button_element = self.driver.find_element_by_xpath('//a[@data-aa-title="Free-April-Start-Now"]') sign_up_now_button_element.click() time.sleep(1) email_input_element = self.driver.find_element_by_name("email") firstname_input_element = self.driver.find_element_by_name("firstname") lastname_input_element = self.driver.find_element_by_name("lastname") tos_checkbox_element = self.driver.find_element_by_name("optInBox") firstName, lastName = self.get_name() email_input_element.send_keys(self.email) firstname_input_element.send_keys(firstName) lastname_input_element.send_keys(lastName) tos_checkbox_element.click() time.sleep(SELENIUM_SLEEP_DURATION) create_account_button_element = self.driver.find_element_by_xpath( "//*[contains(text(), 'I agree, activate benefit')]") create_account_button_element.click() time.sleep(30) cancel_button_element = self.driver.find_element_by_class_name("cancelButton---CKAut") cancel_button_element.click() time.sleep(SELENIUM_SLEEP_DURATION) def set_password(self, verification_link: str) -> None: """ Sets password in the given verification link Args: verification_link: The verification link (as string) to set up password """ self.driver.get(verification_link) time.sleep(SELENIUM_SLEEP_DURATION) password_input_element = self.driver.find_element_by_id("Password") password_confirm_input_element = self.driver.find_element_by_id("PasswordConfirmation") save_button_element = self.driver.find_element_by_class_name("psds-button--appearance-primary") password_input_element.send_keys(self.password) password_confirm_input_element.send_keys(self.password) time.sleep(1) save_button_element.click() time.sleep(SELENIUM_SLEEP_DURATION) class Progress: def __init__(self, id: int, is_all: bool): self.id = id self.course_link = "" self.is_all = is_all def get_password(min_length: int = 25, max_length: int = 50) -> str: """ Generates a random password using ascii letters and numerical digits Args: min_length: Minimum length of the password, default is 25 max_length: Minimum length of the password, default is 50 Returns: Generated password as string """ length = random.randint(min_length, max_length) alphabet = string.ascii_letters + string.digits password = ''.join(secrets.choice(alphabet) for _ in range(length)) return password def create_pluralsight_account(credential_file_path: str) -> Dict[str, str]: """ Creates new Pluralsight account and returns the email/password as a dictionary Returns: Dict[str, str]: Dictionary containing email and password with eponymous pair keys """ disposable_email = DisposableMail() password = get_password() with Pluralsight(email=disposable_email.email_address, password=password, is_headless=HIDE_SELENIUM_INSTANCES) as ps: ps.register() verification_link = disposable_email.get_verification_link() ps.set_password(verification_link=verification_link) time.sleep(SELENIUM_SLEEP_DURATION) with open(credential_file_path, 'w+') as account_file: account_file.write(f"{disposable_email.email_address}\n") account_file.write(f"{password}\n") return {'email': disposable_email, 'password': password} def download_course(course_link: str, username: str, password: str, save_directory_path: str) -> bool: """ Download the given course using the provided credential Args: course_link: The link of the course to download username: Username (Email) of the Pluralsight account to be used for download password: Password of the Pluralsight account to be used for download save_directory_path: Absolute path of Root save directory Returns: True/False bool value denoting the success status of the download """ try: ydl_options['username'] = username ydl_options['password'] = password if IS_WINDOWS: ydl_options[ 'outtmpl'] = f"{save_directory_path}\\%(playlist)s\\%(chapter_number)s - %(chapter)s\\%(playlist_index)s - %(title)s.%(ext)s" else: ydl_options[ 'outtmpl'] = f"{save_directory_path}/%(playlist)s/%(chapter_number)s - %(chapter)s/%(playlist_index)s - %(title)s.%(ext)s" with youtube_dl.YoutubeDL(ydl_options) as ydl: ydl.download([course_link]) return True except: return False def get_all_pluralsight_paths(url: str) -> List[PluralsightPath]: if url.startswith("http"): json_string = requests.get(url).text else: json_string = Path(JSON_FILE_URL).read_text() all_pluralsight_paths_dicts_list = json.loads(json_string) all_pluralsight_paths_list = [PluralsightPath(**dict) for dict in all_pluralsight_paths_dicts_list] return all_pluralsight_paths_list def print_pluralsight_paths_and_courses(pluralsight_paths_list: List[PluralsightPath], search_query: str) -> List[PluralsightPath]: queried_paths_list = [] for pluralsight_path in pluralsight_paths_list: if search_query.lower() not in pluralsight_path.title.lower(): continue print(f"{pluralsight_path.id} | {pluralsight_path.title}") course_serial = 1 print("\t0 - [DOWNLOAD ALL COURSES]") for course_link in pluralsight_path.course_links: print(f"\t{course_serial} - {course_link}") course_serial += 1 queried_paths_list.append(pluralsight_path) print("0 | [DOWNLOAD ALL PATHS]") return queried_paths_list def get_directory_full_path(root_directory_path: str, pluralsight_path: PluralsightPath) -> str: directory_name = f"{pluralsight_path.id:03d} - {pluralsight_path.slug}" if IS_WINDOWS \ else f"{pluralsight_path.id:03d} - {pluralsight_path.title}" directory_full_path = os.path.join(root_directory_path, directory_name) return directory_full_path def download_pluralsight_paths(pluralsight_paths=List[PluralsightPath]): pass # region Click Prompts def prompt_download_all() -> bool: is_download_all = click.prompt("Download All? ", default=False, show_default=True, type=bool, prompt_suffix="") return is_download_all def prompt_paths_and_courses_selection(all_pluralsight_paths_list: List[PluralsightPath]) -> UserSelection: while True: try: search_query = click.prompt("Search Query", type=str) queried_paths = print_pluralsight_paths_and_courses(all_pluralsight_paths_list, search_query) selected_paths = queried_paths while True: path_id = click.prompt("Select Path (by id)", type=int, default=0, show_default=True) if path_id == 0: return UserSelection(selected_paths=queried_paths) for path in queried_paths: if path_id == path.id: selected_paths = [path] break else: continue break while True: course_id = click.prompt("Select Course (by id)", type=int, default=0, show_default=True) if 0 <= course_id <= len(selected_paths[0].course_links): break if course_id == 0: return UserSelection(selected_paths=selected_paths) return UserSelection(selected_paths, selected_paths[0].course_links[course_id - 1]) except Exception as e: print(e) if click.prompt("Exit", default=False, show_default=True, type=bool): return UserSelection([], is_exit=True) # endregion def get_credential() -> Tuple[str, str]: if not os.path.exists(CREDENTIAL_FILE_PATH): print("CREATING NEW PLURALSIGHT ACCOUNT") create_pluralsight_account(CREDENTIAL_FILE_PATH) print("SUCCESS! NEW PLURALSIGHT ACCOUNT CREATED.") with open(CREDENTIAL_FILE_PATH, 'r') as account_file: lines = account_file.readlines() credential = lines[0].rstrip(), lines[1].rstrip() return credential def save_progress(path_id: int, course_index: int): with open(PROGRESS_FILE_PATH, 'w+') as progress_file: progress_file.write(f"{path_id}|{course_index}") def download_all(all_pluralsight_paths: List[PluralsightPath]): global SAVE_DIRECTORY_PATH try: if not os.path.isfile(PROGRESS_FILE_PATH): save_progress(path_id=1, course_index=0) current_path_id, current_course_index = map(int, Path(PROGRESS_FILE_PATH).read_text().rstrip().split("|")) while current_path_id <= len(all_pluralsight_paths): email, password = get_credential() for pluralsight_path in all_pluralsight_paths: if int(current_path_id) == pluralsight_path.id: while current_course_index < len(pluralsight_path.course_links): save_directory_path = get_directory_full_path(SAVE_DIRECTORY_PATH, pluralsight_path) course_link = pluralsight_path.course_links[current_course_index] download_result = download_course(course_link, email, password, save_directory_path) if not download_result: raise Exception("Failed to download course") current_course_index += 1 save_progress(current_path_id, current_course_index) current_path_id += 1 save_progress(current_path_id, current_course_index) break except Exception as exception: print(exception) def main(): global SAVE_DIRECTORY_PATH all_pluralsight_paths_list = get_all_pluralsight_paths(JSON_FILE_URL) if prompt_download_all(): download_all(all_pluralsight_paths_list) return user_selection = prompt_paths_and_courses_selection(all_pluralsight_paths_list) if user_selection.is_exit: return while True: email_address, password = get_credential() if user_selection.selected_course_link: download_course(course_link=user_selection.selected_course_link, username=email_address, password=password) else: for path in user_selection.selected_paths: save_directory_path = get_directory_full_path(SAVE_DIRECTORY_PATH, path) for course_link in path.course_links: download_course(course_link=course_link, username=email_address, password=password, save_directory_path=save_directory_path) if __name__ == '__main__': main()