You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

500 lines
17 KiB

import json
import os
import random
import re
import secrets
import string
import time
from pathlib import Path
from sys import platform
from typing import Dict, List, Tuple
import click
import requests
import youtube_dl
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
# region Global Constant(s) and Readonly Variable(s)
# True/False to determine whether selenium instances will be visible or not (headless)
HIDE_SELENIUM_INSTANCES = False
# The maximum number of courses to download from a single account
MAX_COURSE_DOWNLOAD_COUNT = 5
# Denotes Time.Sleep() duration in seconds
SELENIUM_SLEEP_DURATION = 5
# Minimum number of seconds to wait between consecutive video downloads
MIN_VIDEO_DOWNLOAD_DELAY = 1
# Minimum number of seconds to wait between consecutive video downloads
MAX_VIDEO_DOWNLOAD_DELAY = 10
# Check if current OS/platform is Windows
IS_WINDOWS = platform.startswith("win")
# Master Directory Path (Default: Working Directory)
MASTER_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
# Path of the text file where pluralsight account details will be stored
CREDENTIAL_FILE_PATH = os.path.join(MASTER_DIRECTORY, "credential.txt")
# Path of the archive text file to log all downloaded videos
ARCHIVE_FILE_PATH = os.path.join(MASTER_DIRECTORY, "archive.txt")
# Progress text file path (Progress file stores progress of download_all())
PROGRESS_FILE_PATH = os.path.join(MASTER_DIRECTORY, "progress.txt")
# Path of the directory where downloaded courses will be saved
SAVE_DIRECTORY_PATH = os.path.join(MASTER_DIRECTORY, "Courses")
# JSON File (.json) containing the JSON of the paths/courses dump of Pluralsight
# JSON_FILE_URL = os.path.join(WORKING_DIRECTORY, "pluralsight.json")
JSON_FILE_URL = "https://git.teknik.io/CanWePlsRapeTheShitOuttaPluralsight/RapePluralsight/raw/branch/master/pluralsight.json"
# Options for youtube-dl. For a complete list of options, check https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312
ydl_options = {
'writesubtitles': True,
'nooverwrites': True,
'sleep_interval': MIN_VIDEO_DOWNLOAD_DELAY,
'max_sleep_interval': MAX_VIDEO_DOWNLOAD_DELAY,
'download_archive': ARCHIVE_FILE_PATH
}
# endregion
class PluralsightPath(object):
def __init__(self, id: int, link: str, title: str, slug: str, course_links: List[str]):
self.id = id
self.link = link
self.title = title
self.slug = slug
self.course_links = course_links
class UserSelection:
def __init__(self, selected_paths: List[PluralsightPath], selected_course_link: str = "", is_exit: bool = False):
self.selected_paths = selected_paths
self.selected_course_link = selected_course_link
self.is_exit = is_exit
class DisposableMail:
"""
This class is used to generate random disposable emails
"""
def __init__(self):
self.email_address = requests.post("https://gmailnator.com/index/indexquery", {'action': 'GenerateEmail'}).text
def get_mail_item_id(self) -> object:
post_url = "https://gmailnator.com/mailbox/mailboxquery"
post_data = {
'action': 'LoadMailList',
'Email_address': self.email_address
}
while True:
try:
time.sleep(1)
response_text = requests.post(post_url, post_data).json()[0]['content']
result = re.findall('#(.*)\\">', response_text)
mail_id = result[0]
return mail_id
except:
pass
def get_verification_link(self) -> str:
post_url = "https://gmailnator.com/mailbox/get_single_message/"
post_data = {
'action': 'LoadMailList',
'message_id': self.get_mail_item_id(),
'email': self.email_address.split("+")[0]
}
response_data = requests.post(post_url, post_data).text
soup = BeautifulSoup(response_data)
for link in soup.findAll('a', href=True):
if "https://app.pluralsight.com/id/forgotpassword/reset?token" in link['href']:
return link['href']
class Pluralsight:
"""
This class handles the registration and verification of new Pluralsight accounts
"""
def __init__(self, email: str, password: str, is_headless: bool = True):
if is_headless:
options = Options()
options.add_argument("--headless")
self.driver = webdriver.Firefox(options=options)
else:
self.driver = webdriver.Firefox()
self.email = email
self.password = password
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.driver.quit()
@staticmethod
def get_name() -> Tuple[str, str]:
"""
Generate a random string to be used as first or last name
Returns:
str: Generated string
"""
request_url = "http://names.drycodes.com/1?nameOptions=boy_names" if random.choice([True, False]) \
else "http://names.drycodes.com/1?nameOptions=girl_names"
first_last_name = requests.get(request_url).text.strip('"[]').split('_')
return first_last_name
def register(self) -> None:
"""
Registers new Pluralsight account
"""
self.driver.get("https://www.pluralsight.com/offer/2020/free-april-month")
time.sleep(SELENIUM_SLEEP_DURATION)
accept_cookie_button_element = self.driver.find_element_by_class_name("cookie_notification--opt_in")
accept_cookie_button_element.click()
time.sleep(1)
sign_up_now_button_element = self.driver.find_element_by_xpath('//a[@data-aa-title="Free-April-Start-Now"]')
sign_up_now_button_element.click()
time.sleep(1)
email_input_element = self.driver.find_element_by_name("email")
firstname_input_element = self.driver.find_element_by_name("firstname")
lastname_input_element = self.driver.find_element_by_name("lastname")
tos_checkbox_element = self.driver.find_element_by_name("optInBox")
firstName, lastName = self.get_name()
email_input_element.send_keys(self.email)
firstname_input_element.send_keys(firstName)
lastname_input_element.send_keys(lastName)
tos_checkbox_element.click()
time.sleep(SELENIUM_SLEEP_DURATION)
create_account_button_element = self.driver.find_element_by_xpath(
"//*[contains(text(), 'I agree, activate benefit')]")
create_account_button_element.click()
time.sleep(30)
cancel_button_element = self.driver.find_element_by_class_name("cancelButton---CKAut")
cancel_button_element.click()
time.sleep(SELENIUM_SLEEP_DURATION)
def set_password(self, verification_link: str) -> None:
"""
Sets password in the given verification link
Args:
verification_link: The verification link (as string) to set up password
"""
self.driver.get(verification_link)
time.sleep(SELENIUM_SLEEP_DURATION)
password_input_element = self.driver.find_element_by_id("Password")
password_confirm_input_element = self.driver.find_element_by_id("PasswordConfirmation")
save_button_element = self.driver.find_element_by_class_name("psds-button--appearance-primary")
password_input_element.send_keys(self.password)
password_confirm_input_element.send_keys(self.password)
time.sleep(1)
save_button_element.click()
time.sleep(SELENIUM_SLEEP_DURATION)
class Progress:
def __init__(self, id: int, is_all: bool):
self.id = id
self.course_link = ""
self.is_all = is_all
def get_password(min_length: int = 25, max_length: int = 50) -> str:
"""
Generates a random password using ascii letters and numerical digits
Args:
min_length: Minimum length of the password, default is 25
max_length: Minimum length of the password, default is 50
Returns: Generated password as string
"""
length = random.randint(min_length, max_length)
alphabet = string.ascii_letters + string.digits
password = ''.join(secrets.choice(alphabet) for _ in range(length))
return password
def create_pluralsight_account(credential_file_path: str) -> Dict[str, str]:
"""
Creates new Pluralsight account and returns the email/password as a dictionary
Returns:
Dict[str, str]: Dictionary containing email and password with eponymous pair keys
"""
disposable_email = DisposableMail()
password = get_password()
with Pluralsight(email=disposable_email.email_address, password=password,
is_headless=HIDE_SELENIUM_INSTANCES) as ps:
ps.register()
verification_link = disposable_email.get_verification_link()
ps.set_password(verification_link=verification_link)
time.sleep(SELENIUM_SLEEP_DURATION)
with open(credential_file_path, 'w+') as account_file:
account_file.write(f"{disposable_email.email_address}\n")
account_file.write(f"{password}\n")
return {'email': disposable_email, 'password': password}
def download_course(course_link: str, username: str, password: str, save_directory_path: str) -> bool:
"""
Download the given course using the provided credential
Args:
course_link: The link of the course to download
username: Username (Email) of the Pluralsight account to be used for download
password: Password of the Pluralsight account to be used for download
save_directory_path: Absolute path of Root save directory
Returns: True/False bool value denoting the success status of the download
"""
try:
ydl_options['username'] = username
ydl_options['password'] = password
if IS_WINDOWS:
ydl_options[
'outtmpl'] = f"{save_directory_path}\\%(playlist)s\\%(chapter_number)s - %(chapter)s\\%(playlist_index)s - %(title)s.%(ext)s"
else:
ydl_options[
'outtmpl'] = f"{save_directory_path}/%(playlist)s/%(chapter_number)s - %(chapter)s/%(playlist_index)s - %(title)s.%(ext)s"
with youtube_dl.YoutubeDL(ydl_options) as ydl:
ydl.download([course_link])
return True
except:
return False
def get_all_pluralsight_paths(url: str) -> List[PluralsightPath]:
if url.startswith("http"):
json_string = requests.get(url).text
else:
json_string = Path(JSON_FILE_URL).read_text()
all_pluralsight_paths_dicts_list = json.loads(json_string)
all_pluralsight_paths_list = [PluralsightPath(**dict) for dict in all_pluralsight_paths_dicts_list]
return all_pluralsight_paths_list
def print_pluralsight_paths_and_courses(pluralsight_paths_list: List[PluralsightPath],
search_query: str) -> List[PluralsightPath]:
queried_paths_list = []
for pluralsight_path in pluralsight_paths_list:
if search_query.lower() not in pluralsight_path.title.lower():
continue
print(f"{pluralsight_path.id} | {pluralsight_path.title}")
course_serial = 1
print("\t0 - [DOWNLOAD ALL COURSES]")
for course_link in pluralsight_path.course_links:
print(f"\t{course_serial} - {course_link}")
course_serial += 1
queried_paths_list.append(pluralsight_path)
print("0 | [DOWNLOAD ALL PATHS]")
return queried_paths_list
def get_directory_full_path(root_directory_path: str, pluralsight_path: PluralsightPath) -> str:
directory_name = f"{pluralsight_path.id:03d} - {pluralsight_path.slug}" if IS_WINDOWS \
else f"{pluralsight_path.id:03d} - {pluralsight_path.title}"
directory_full_path = os.path.join(root_directory_path, directory_name)
return directory_full_path
def download_pluralsight_paths(pluralsight_paths=List[PluralsightPath]):
pass
# region Click Prompts
def prompt_download_all() -> bool:
is_download_all = click.prompt("Download All? ", default=False, show_default=True, type=bool, prompt_suffix="")
return is_download_all
def prompt_paths_and_courses_selection(all_pluralsight_paths_list: List[PluralsightPath]) -> UserSelection:
while True:
try:
search_query = click.prompt("Search Query", type=str)
queried_paths = print_pluralsight_paths_and_courses(all_pluralsight_paths_list, search_query)
selected_paths = queried_paths
while True:
path_id = click.prompt("Select Path (by id)", type=int, default=0, show_default=True)
if path_id == 0:
return UserSelection(selected_paths=queried_paths)
for path in queried_paths:
if path_id == path.id:
selected_paths = [path]
break
else:
continue
break
while True:
course_id = click.prompt("Select Course (by id)", type=int, default=0, show_default=True)
if 0 <= course_id <= len(selected_paths[0].course_links):
break
if course_id == 0:
return UserSelection(selected_paths=selected_paths)
return UserSelection(selected_paths, selected_paths[0].course_links[course_id - 1])
except Exception as e:
print(e)
if click.prompt("Exit", default=False, show_default=True, type=bool):
return UserSelection([], is_exit=True)
# endregion
def get_credential() -> Tuple[str, str]:
if not os.path.exists(CREDENTIAL_FILE_PATH):
print("CREATING NEW PLURALSIGHT ACCOUNT")
create_pluralsight_account(CREDENTIAL_FILE_PATH)
print("SUCCESS! NEW PLURALSIGHT ACCOUNT CREATED.")
with open(CREDENTIAL_FILE_PATH, 'r') as account_file:
lines = account_file.readlines()
credential = lines[0].rstrip(), lines[1].rstrip()
return credential
def save_progress(path_id: int, course_index: int):
with open(PROGRESS_FILE_PATH, 'w+') as progress_file:
progress_file.write(f"{path_id}|{course_index}")
def download_all(all_pluralsight_paths: List[PluralsightPath]):
global SAVE_DIRECTORY_PATH
try:
if not os.path.isfile(PROGRESS_FILE_PATH):
save_progress(path_id=1, course_index=0)
current_path_id, current_course_index = map(int, Path(PROGRESS_FILE_PATH).read_text().rstrip().split("|"))
while current_path_id <= len(all_pluralsight_paths):
email, password = get_credential()
for pluralsight_path in all_pluralsight_paths:
if int(current_path_id) == pluralsight_path.id:
while current_course_index < len(pluralsight_path.course_links):
save_directory_path = get_directory_full_path(SAVE_DIRECTORY_PATH, pluralsight_path)
course_link = pluralsight_path.course_links[current_course_index]
download_result = download_course(course_link, email, password, save_directory_path)
if not download_result:
raise Exception("Failed to download course")
current_course_index += 1
save_progress(current_path_id, current_course_index)
current_path_id += 1
current_course_index = 0
save_progress(current_path_id, current_course_index)
break
except Exception as exception:
print(exception)
def main():
global SAVE_DIRECTORY_PATH
all_pluralsight_paths_list = get_all_pluralsight_paths(JSON_FILE_URL)
if prompt_download_all():
download_all(all_pluralsight_paths_list)
return
user_selection = prompt_paths_and_courses_selection(all_pluralsight_paths_list)
if user_selection.is_exit:
return
while True:
email_address, password = get_credential()
if user_selection.selected_course_link:
download_course(course_link=user_selection.selected_course_link, username=email_address, password=password)
else:
for path in user_selection.selected_paths:
save_directory_path = get_directory_full_path(SAVE_DIRECTORY_PATH, path)
for course_link in path.course_links:
download_course(course_link=course_link,
username=email_address,
password=password,
save_directory_path=save_directory_path)
if __name__ == '__main__':
main()