You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
504 lines
17 KiB
504 lines
17 KiB
import json |
|
import os |
|
import random |
|
import re |
|
import secrets |
|
import string |
|
import time |
|
from pathlib import Path |
|
from sys import platform |
|
from typing import Dict, List, Tuple |
|
|
|
import click |
|
import requests |
|
import youtube_dl |
|
from bs4 import BeautifulSoup |
|
from selenium import webdriver |
|
from selenium.webdriver.firefox.options import Options |
|
|
|
# region Global Constant(s) and Readonly Variable(s) |
|
|
|
# True/False to determine whether selenium instances will be visible or not (headless) |
|
HIDE_SELENIUM_INSTANCES = True |
|
|
|
# The maximum number of courses to download from a single account |
|
MAX_COURSE_DOWNLOAD_COUNT = 5 |
|
|
|
# Denotes Time.Sleep() duration in seconds |
|
SELENIUM_SLEEP_DURATION = 5 |
|
|
|
# Minimum number of seconds to wait between consecutive video downloads |
|
MIN_VIDEO_DOWNLOAD_DELAY = 1 |
|
|
|
# Minimum number of seconds to wait between consecutive video downloads |
|
MAX_VIDEO_DOWNLOAD_DELAY = 10 |
|
|
|
# Check if current OS/platform is Windows |
|
IS_WINDOWS = platform.startswith("win") |
|
|
|
# Master Directory Path (Default: Working Directory) |
|
MASTER_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) |
|
|
|
# Path of the text file where pluralsight account details will be stored |
|
CREDENTIAL_FILE_PATH = os.path.join(MASTER_DIRECTORY, "credential.txt") |
|
|
|
# Path of the archive text file to log all downloaded videos |
|
ARCHIVE_FILE_PATH = os.path.join(MASTER_DIRECTORY, "archive.txt") |
|
|
|
# Progress text file path (Progress file stores progress of download_all()) |
|
PROGRESS_FILE_PATH = os.path.join(MASTER_DIRECTORY, "progress.txt") |
|
|
|
# Path of the directory where downloaded courses will be saved |
|
SAVE_DIRECTORY_PATH = os.path.join(MASTER_DIRECTORY, "Courses") |
|
|
|
# JSON File (.json) containing the JSON of the paths/courses dump of Pluralsight |
|
JSON_FILE_URL = os.path.join(MASTER_DIRECTORY, "pluralsight.json") |
|
|
|
# Options for youtube-dl. For a complete list of options, check https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312 |
|
ydl_options = { |
|
'writesubtitles': True, |
|
'nooverwrites': True, |
|
'sleep_interval': MIN_VIDEO_DOWNLOAD_DELAY, |
|
'max_sleep_interval': MAX_VIDEO_DOWNLOAD_DELAY, |
|
'download_archive': ARCHIVE_FILE_PATH |
|
} |
|
|
|
|
|
# endregion |
|
|
|
|
|
class PluralsightPath(object): |
|
def __init__(self, id: int, link: str, title: str, slug: str, course_links: List[str]): |
|
self.id = id |
|
self.link = link |
|
self.title = title |
|
self.slug = slug |
|
self.course_links = course_links |
|
|
|
|
|
class UserSelection: |
|
def __init__(self, selected_paths: List[PluralsightPath], selected_course_link: str = "", is_exit: bool = False): |
|
self.selected_paths = selected_paths |
|
self.selected_course_link = selected_course_link |
|
self.is_exit = is_exit |
|
|
|
|
|
class DisposableMail: |
|
""" |
|
This class is used to generate random disposable emails |
|
""" |
|
|
|
def __init__(self): |
|
self.email_address = requests.post("https://gmailnator.com/index/indexquery", {'action': 'GenerateEmail'}).text |
|
|
|
def get_mail_item_id(self) -> object: |
|
post_url = "https://gmailnator.com/mailbox/mailboxquery" |
|
post_data = { |
|
'action': 'LoadMailList', |
|
'Email_address': self.email_address |
|
} |
|
|
|
while True: |
|
try: |
|
time.sleep(1) |
|
|
|
response_text = requests.post(post_url, post_data).json()[0]['content'] |
|
|
|
result = re.findall('#(.*)\\">', response_text) |
|
mail_id = result[0] |
|
|
|
return mail_id |
|
|
|
except: |
|
pass |
|
|
|
def get_verification_link(self) -> str: |
|
post_url = "https://gmailnator.com/mailbox/get_single_message/" |
|
post_data = { |
|
'action': 'LoadMailList', |
|
'message_id': self.get_mail_item_id(), |
|
'email': self.email_address.split("+")[0] |
|
} |
|
|
|
response_data = requests.post(post_url, post_data).text |
|
|
|
soup = BeautifulSoup(response_data) |
|
for link in soup.findAll('a', href=True): |
|
if "https://app.pluralsight.com/id/forgotpassword/reset?token" in link['href']: |
|
return link['href'] |
|
|
|
|
|
class Pluralsight: |
|
""" |
|
This class handles the registration and verification of new Pluralsight accounts |
|
""" |
|
|
|
def __init__(self, email: str, password: str, is_headless: bool = True): |
|
if is_headless: |
|
options = Options() |
|
options.add_argument("--headless") |
|
self.driver = webdriver.Firefox(options=options) |
|
else: |
|
self.driver = webdriver.Firefox() |
|
|
|
self.email = email |
|
self.password = password |
|
|
|
def __enter__(self): |
|
return self |
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb): |
|
self.driver.quit() |
|
|
|
@staticmethod |
|
def get_name() -> Tuple[str, str]: |
|
""" |
|
Generate a random string to be used as first or last name |
|
|
|
Returns: |
|
str: Generated string |
|
""" |
|
|
|
request_url = "http://names.drycodes.com/1?nameOptions=boy_names" if random.choice([True, False]) \ |
|
else "http://names.drycodes.com/1?nameOptions=girl_names" |
|
|
|
first_last_name = requests.get(request_url).text.strip('"[]').split('_') |
|
|
|
return first_last_name |
|
|
|
def register(self) -> None: |
|
""" |
|
Registers new Pluralsight account |
|
""" |
|
|
|
self.driver.get("https://www.pluralsight.com/offer/2020/free-april-month") |
|
time.sleep(SELENIUM_SLEEP_DURATION) |
|
|
|
accept_cookie_button_element = self.driver.find_element_by_class_name("cookie_notification--opt_in") |
|
accept_cookie_button_element.click() |
|
|
|
time.sleep(1) |
|
|
|
sign_up_now_button_element = self.driver.find_element_by_xpath('//a[@data-aa-title="Free-April-Start-Now"]') |
|
sign_up_now_button_element.click() |
|
|
|
time.sleep(1) |
|
|
|
email_input_element = self.driver.find_element_by_name("email") |
|
firstname_input_element = self.driver.find_element_by_name("firstname") |
|
lastname_input_element = self.driver.find_element_by_name("lastname") |
|
tos_checkbox_element = self.driver.find_element_by_name("optInBox") |
|
|
|
firstName, lastName = self.get_name() |
|
|
|
email_input_element.send_keys(self.email) |
|
firstname_input_element.send_keys(firstName) |
|
lastname_input_element.send_keys(lastName) |
|
tos_checkbox_element.click() |
|
|
|
time.sleep(SELENIUM_SLEEP_DURATION) |
|
|
|
create_account_button_element = self.driver.find_element_by_xpath( |
|
"//*[contains(text(), 'I agree, activate benefit')]") |
|
create_account_button_element.click() |
|
|
|
time.sleep(30) |
|
|
|
cancel_button_element = self.driver.find_element_by_class_name("cancelButton---CKAut") |
|
cancel_button_element.click() |
|
|
|
time.sleep(SELENIUM_SLEEP_DURATION) |
|
|
|
def set_password(self, verification_link: str) -> None: |
|
""" |
|
Sets password in the given verification link |
|
|
|
Args: |
|
verification_link: The verification link (as string) to set up password |
|
""" |
|
|
|
self.driver.get(verification_link) |
|
time.sleep(SELENIUM_SLEEP_DURATION) |
|
|
|
password_input_element = self.driver.find_element_by_id("Password") |
|
password_confirm_input_element = self.driver.find_element_by_id("PasswordConfirmation") |
|
save_button_element = self.driver.find_element_by_class_name("psds-button--appearance-primary") |
|
|
|
password_input_element.send_keys(self.password) |
|
password_confirm_input_element.send_keys(self.password) |
|
|
|
time.sleep(1) |
|
|
|
save_button_element.click() |
|
|
|
time.sleep(SELENIUM_SLEEP_DURATION) |
|
|
|
|
|
class Progress: |
|
def __init__(self, id: int, is_all: bool): |
|
self.id = id |
|
self.course_link = "" |
|
self.is_all = is_all |
|
|
|
|
|
def get_password(min_length: int = 25, max_length: int = 50) -> str: |
|
""" |
|
Generates a random password using ascii letters and numerical digits |
|
Args: |
|
min_length: Minimum length of the password, default is 25 |
|
max_length: Minimum length of the password, default is 50 |
|
|
|
Returns: Generated password as string |
|
""" |
|
|
|
length = random.randint(min_length, max_length) |
|
alphabet = string.ascii_letters + string.digits |
|
password = ''.join(secrets.choice(alphabet) for _ in range(length)) |
|
|
|
return password |
|
|
|
|
|
def create_pluralsight_account(credential_file_path: str) -> Dict[str, str]: |
|
""" |
|
Creates new Pluralsight account and returns the email/password as a dictionary |
|
|
|
Returns: |
|
Dict[str, str]: Dictionary containing email and password with eponymous pair keys |
|
""" |
|
|
|
disposable_email = DisposableMail() |
|
password = get_password() |
|
|
|
with Pluralsight(email=disposable_email.email_address, password=password, |
|
is_headless=HIDE_SELENIUM_INSTANCES) as ps: |
|
ps.register() |
|
|
|
verification_link = disposable_email.get_verification_link() |
|
|
|
ps.set_password(verification_link=verification_link) |
|
|
|
time.sleep(SELENIUM_SLEEP_DURATION) |
|
|
|
with open(credential_file_path, 'w+') as account_file: |
|
account_file.write(f"{disposable_email.email_address}\n") |
|
account_file.write(f"{password}\n") |
|
|
|
return {'email': disposable_email, 'password': password} |
|
|
|
|
|
def download_course(course_link: str, username: str, password: str, save_directory_path: str) -> bool: |
|
""" |
|
Download the given course using the provided credential |
|
|
|
Args: |
|
course_link: The link of the course to download |
|
username: Username (Email) of the Pluralsight account to be used for download |
|
password: Password of the Pluralsight account to be used for download |
|
save_directory_path: Absolute path of Root save directory |
|
|
|
Returns: True/False bool value denoting the success status of the download |
|
""" |
|
|
|
try: |
|
ydl_options['username'] = username |
|
ydl_options['password'] = password |
|
|
|
if IS_WINDOWS: |
|
ydl_options[ |
|
'outtmpl'] = f"{save_directory_path}\\%(playlist)s\\%(chapter_number)s - %(chapter)s\\%(playlist_index)s - %(title)s.%(ext)s" |
|
else: |
|
ydl_options[ |
|
'outtmpl'] = f"{save_directory_path}/%(playlist)s/%(chapter_number)s - %(chapter)s/%(playlist_index)s - %(title)s.%(ext)s" |
|
|
|
with youtube_dl.YoutubeDL(ydl_options) as ydl: |
|
ydl.download([course_link]) |
|
|
|
return True |
|
except: |
|
return False |
|
|
|
|
|
def get_all_pluralsight_paths(url: str) -> List[PluralsightPath]: |
|
if url.startswith("http"): |
|
json_string = requests.get(url).text |
|
else: |
|
json_string = Path(JSON_FILE_URL).read_text() |
|
|
|
all_pluralsight_paths_dicts_list = json.loads(json_string) |
|
all_pluralsight_paths_list = [PluralsightPath(**dict) for dict in all_pluralsight_paths_dicts_list] |
|
|
|
return all_pluralsight_paths_list |
|
|
|
|
|
def print_pluralsight_paths_and_courses(pluralsight_paths_list: List[PluralsightPath], |
|
search_query: str) -> List[PluralsightPath]: |
|
queried_paths_list = [] |
|
for pluralsight_path in pluralsight_paths_list: |
|
if search_query.lower() not in pluralsight_path.title.lower(): |
|
continue |
|
|
|
print(f"{pluralsight_path.id} | {pluralsight_path.title}") |
|
|
|
course_serial = 1 |
|
print("\t0 - [DOWNLOAD ALL COURSES]") |
|
for course_link in pluralsight_path.course_links: |
|
print(f"\t{course_serial} - {course_link}") |
|
course_serial += 1 |
|
|
|
queried_paths_list.append(pluralsight_path) |
|
|
|
print("0 | [DOWNLOAD ALL PATHS]") |
|
|
|
return queried_paths_list |
|
|
|
|
|
def get_directory_full_path(root_directory_path: str, pluralsight_path: PluralsightPath) -> str: |
|
directory_name = f"{pluralsight_path.id:03d} - {pluralsight_path.slug}" if IS_WINDOWS \ |
|
else f"{pluralsight_path.id:03d} - {pluralsight_path.title}" |
|
|
|
directory_full_path = os.path.join(root_directory_path, directory_name) |
|
|
|
return directory_full_path |
|
|
|
|
|
def download_pluralsight_paths(pluralsight_paths=List[PluralsightPath]): |
|
pass |
|
|
|
|
|
# region Click Prompts |
|
|
|
def prompt_download_all() -> bool: |
|
is_download_all = click.prompt("Download All? ", default=False, show_default=True, type=bool, prompt_suffix="") |
|
return is_download_all |
|
|
|
|
|
def prompt_paths_and_courses_selection(all_pluralsight_paths_list: List[PluralsightPath]) -> UserSelection: |
|
while True: |
|
try: |
|
search_query = click.prompt("Search Query", type=str) |
|
|
|
queried_paths = print_pluralsight_paths_and_courses(all_pluralsight_paths_list, search_query) |
|
|
|
selected_paths = queried_paths |
|
while True: |
|
path_id = click.prompt("Select Path (by id)", type=int, default=0, show_default=True) |
|
|
|
if path_id == 0: |
|
return UserSelection(selected_paths=queried_paths) |
|
|
|
for path in queried_paths: |
|
if path_id == path.id: |
|
selected_paths = [path] |
|
break |
|
else: |
|
continue |
|
break |
|
|
|
while True: |
|
course_id = click.prompt("Select Course (by id)", type=int, default=0, show_default=True) |
|
if 0 <= course_id <= len(selected_paths[0].course_links): |
|
break |
|
|
|
if course_id == 0: |
|
return UserSelection(selected_paths=selected_paths) |
|
return UserSelection(selected_paths, selected_paths[0].course_links[course_id - 1]) |
|
|
|
except Exception as e: |
|
print(e) |
|
if click.prompt("Exit", default=False, show_default=True, type=bool): |
|
return UserSelection([], is_exit=True) |
|
|
|
|
|
# endregion |
|
|
|
|
|
def get_credential() -> Tuple[str, str]: |
|
if not os.path.exists(CREDENTIAL_FILE_PATH): |
|
print("CREATING NEW PLURALSIGHT ACCOUNT") |
|
create_pluralsight_account(CREDENTIAL_FILE_PATH) |
|
print("SUCCESS! NEW PLURALSIGHT ACCOUNT CREATED.") |
|
|
|
with open(CREDENTIAL_FILE_PATH, 'r') as account_file: |
|
lines = account_file.readlines() |
|
credential = lines[0].rstrip(), lines[1].rstrip() |
|
|
|
return credential |
|
|
|
|
|
def save_progress(path_id: int, course_index: int): |
|
with open(PROGRESS_FILE_PATH, 'w+') as progress_file: |
|
progress_file.write(f"{path_id}|{course_index}") |
|
|
|
|
|
def download_all(all_pluralsight_paths: List[PluralsightPath]): |
|
global SAVE_DIRECTORY_PATH |
|
|
|
while True: |
|
try: |
|
if not os.path.isfile(PROGRESS_FILE_PATH): |
|
save_progress(path_id=1, course_index=0) |
|
|
|
current_path_id, current_course_index = map(int, Path(PROGRESS_FILE_PATH).read_text().rstrip().split("|")) |
|
|
|
while current_path_id <= len(all_pluralsight_paths): |
|
email, password = get_credential() |
|
|
|
for pluralsight_path in all_pluralsight_paths: |
|
if int(current_path_id) == pluralsight_path.id: |
|
while current_course_index < len(pluralsight_path.course_links): |
|
save_directory_path = get_directory_full_path(SAVE_DIRECTORY_PATH, pluralsight_path) |
|
|
|
course_link = pluralsight_path.course_links[current_course_index] |
|
download_result = download_course(course_link, email, password, save_directory_path) |
|
|
|
if not download_result: |
|
raise Exception("Failed to download course") |
|
|
|
current_course_index += 1 |
|
save_progress(current_path_id, current_course_index) |
|
|
|
current_path_id += 1 |
|
current_course_index = 0 |
|
save_progress(current_path_id, current_course_index) |
|
|
|
break |
|
|
|
except(KeyboardInterrupt, SystemExit): |
|
print("EXITING PROGRAM") |
|
break |
|
except Exception as exception: |
|
os.remove(CREDENTIAL_FILE_PATH) |
|
print(exception) |
|
|
|
|
|
def main(): |
|
global SAVE_DIRECTORY_PATH |
|
|
|
all_pluralsight_paths_list = get_all_pluralsight_paths(JSON_FILE_URL) |
|
|
|
if prompt_download_all(): |
|
download_all(all_pluralsight_paths_list) |
|
return |
|
|
|
user_selection = prompt_paths_and_courses_selection(all_pluralsight_paths_list) |
|
|
|
if user_selection.is_exit: |
|
return |
|
|
|
while True: |
|
email_address, password = get_credential() |
|
|
|
if user_selection.selected_course_link: |
|
download_course(course_link=user_selection.selected_course_link, username=email_address, password=password) |
|
else: |
|
for path in user_selection.selected_paths: |
|
save_directory_path = get_directory_full_path(SAVE_DIRECTORY_PATH, path) |
|
for course_link in path.course_links: |
|
download_course(course_link=course_link, |
|
username=email_address, |
|
password=password, |
|
save_directory_path=save_directory_path) |
|
|
|
|
|
if __name__ == '__main__': |
|
main()
|
|
|