Browse Source

PARALLEL DOWNLOADING IS HERE BITCHES!

master
parent
commit
91df6b7e1c
1 changed files with 184 additions and 244 deletions
  1. 184
    244
      pluralsight.py

+ 184
- 244
pluralsight.py View File

@@ -4,56 +4,66 @@ import random
import re
import secrets
import string
import sys
import time
from functools import partial
from multiprocessing.pool import Pool
from pathlib import Path
from sys import platform
from typing import Dict, List, Tuple
from typing import List, Tuple, Dict

import click
import requests
import youtube_dl
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait

# region Global Constant(s) and Readonly Variable(s)

# True/False to determine whether selenium instances will be visible or not (headless)
HIDE_SELENIUM_INSTANCES = True

# The maximum number of courses to download from a single account
MAX_COURSE_DOWNLOAD_COUNT = 5
# True/False to determine whether selenium instances will be visible or not (headless)
HIDE_SELENIUM_INSTANCES = False

# Denotes Time.Sleep() duration in seconds
SELENIUM_SLEEP_DURATION = 5
SELENIUM_TIMEOUT_IN_SECONDS = 30

# Maximum parallel course download count
MAX_PARALLEL_COURSE_DOWNLOAD_COUNT = 3

# Maximum number of retry count for downloads
DOWNLOAD_RETRY_COUNT = 3

# Minimum number of seconds to wait between consecutive video downloads
MIN_VIDEO_DOWNLOAD_DELAY = 1
MIN_VIDEO_DOWNLOAD_DELAY = 10

# Minimum number of seconds to wait between consecutive video downloads
MAX_VIDEO_DOWNLOAD_DELAY = 10
MAX_VIDEO_DOWNLOAD_DELAY = 30

# Check if current OS/platform is Windows
IS_WINDOWS = platform.startswith("win")
IS_WINDOWS = sys.platform.startswith("win")

# Master Directory Path (Default: Working Directory)
MASTER_DIRECTORY = os.path.dirname(os.path.realpath(__file__))

# Path of the text file where pluralsight account details will be stored
CREDENTIAL_FILE_PATH = os.path.join(MASTER_DIRECTORY, "credential.txt")

# Path of the archive text file to log all downloaded videos
ARCHIVE_FILE_PATH = os.path.join(MASTER_DIRECTORY, "archive.txt")

# Progress text file path (Progress file stores progress of download_all())
PROGRESS_FILE_PATH = os.path.join(MASTER_DIRECTORY, "progress.txt")
# Path of the directory where downloaded paths will be saved
SAVE_PATHS_DIRECTORY_PATH = os.path.join(MASTER_DIRECTORY, "Paths")

# Path of the directory where downloaded courses will be saved
SAVE_DIRECTORY_PATH = os.path.join(MASTER_DIRECTORY, "Courses")
# Path of the directory where downloaded independent courses will be saved
SAVE_COURSES_DIRECTORY_PATH = os.path.join(MASTER_DIRECTORY, "Courses")

# JSON File (.json) containing the JSON of the paths/courses dump of Pluralsight
# JSON_FILE_URL = os.path.join(WORKING_DIRECTORY, "pluralsight.json")
JSON_FILE_URL = "https://git.teknik.io/CanWePlsRapeTheShitOuttaPluralsight/RapePluralsight/raw/branch/master/pluralsight.json"
PATHS_JSON_LOCATION = os.path.join(MASTER_DIRECTORY, "pluralsight.json")

# Text File (.txt) containing the independent courses dump of Pluralsight
INDEPENDENT_COURSES_FILE_LOCATION = os.path.join(MASTER_DIRECTORY, "courses.txt")

# Text File (.txt) containing the list of links of the downloaded courses
DOWNLOADED_COURSES_FILE_LOCATION = os.path.join(MASTER_DIRECTORY, "downloaded.txt")

# Options for youtube-dl. For a complete list of options, check https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312
ydl_options = {
@@ -64,10 +74,16 @@ ydl_options = {
'download_archive': ARCHIVE_FILE_PATH
}

# List of links of already downloaded courses
DOWNLOADED_COURSE_LINKS_LIST = []


# endregion


# region Classes


class PluralsightPath(object):
def __init__(self, id: int, link: str, title: str, slug: str, course_links: List[str]):
self.id = id
@@ -77,13 +93,6 @@ class PluralsightPath(object):
self.course_links = course_links


class UserSelection:
def __init__(self, selected_paths: List[PluralsightPath], selected_course_link: str = "", is_exit: bool = False):
self.selected_paths = selected_paths
self.selected_course_link = selected_course_link
self.is_exit = is_exit


class DisposableMail:
"""
This class is used to generate random disposable emails
@@ -123,7 +132,7 @@ class DisposableMail:

response_data = requests.post(post_url, post_data).text

soup = BeautifulSoup(response_data)
soup = BeautifulSoup(response_data, 'html.parser')
for link in soup.findAll('a', href=True):
if "https://app.pluralsight.com/id/forgotpassword/reset?token" in link['href']:
return link['href']
@@ -151,64 +160,55 @@ class Pluralsight:
def __exit__(self, exc_type, exc_val, exc_tb):
self.driver.quit()

@staticmethod
def get_name() -> Tuple[str, str]:
"""
Generate a random string to be used as first or last name

Returns:
str: Generated string
"""

request_url = "http://names.drycodes.com/1?nameOptions=boy_names" if random.choice([True, False]) \
else "http://names.drycodes.com/1?nameOptions=girl_names"

first_last_name = requests.get(request_url).text.strip('"[]').split('_')

return first_last_name

def register(self) -> None:
"""
Registers new Pluralsight account
"""

self.driver.get("https://www.pluralsight.com/offer/2020/free-april-month")
time.sleep(SELENIUM_SLEEP_DURATION)

WebDriverWait(self.driver, SELENIUM_TIMEOUT_IN_SECONDS) \
.until(expected_conditions
.presence_of_element_located((By.CLASS_NAME, "cookie_notification--opt_in")))

accept_cookie_button_element = self.driver.find_element_by_class_name("cookie_notification--opt_in")
accept_cookie_button_element.click()

time.sleep(1)

sign_up_now_button_element = self.driver.find_element_by_xpath('//a[@data-aa-title="Free-April-Start-Now"]')
sign_up_now_button_element.click()

time.sleep(1)

email_input_element = self.driver.find_element_by_name("email")
firstname_input_element = self.driver.find_element_by_name("firstname")
lastname_input_element = self.driver.find_element_by_name("lastname")
tos_checkbox_element = self.driver.find_element_by_name("optInBox")

firstName, lastName = self.get_name()
firstname, lastname = get_name()

email_input_element.send_keys(self.email)
firstname_input_element.send_keys(firstName)
lastname_input_element.send_keys(lastName)
firstname_input_element.send_keys(firstname)
lastname_input_element.send_keys(lastname)
tos_checkbox_element.click()

time.sleep(SELENIUM_SLEEP_DURATION)
WebDriverWait(self.driver, SELENIUM_TIMEOUT_IN_SECONDS) \
.until(expected_conditions
.presence_of_element_located((By.XPATH, "//*[contains(text(), 'I agree, activate benefit')]")))

# time.sleep(SELENIUM_SLEEP_DURATION)

create_account_button_element = self.driver.find_element_by_xpath(
"//*[contains(text(), 'I agree, activate benefit')]")
create_account_button_element.click()

time.sleep(30)
WebDriverWait(self.driver, SELENIUM_TIMEOUT_IN_SECONDS * 4) \
.until(expected_conditions
.presence_of_element_located((By.CLASS_NAME, "cancelButton---CKAut")))

# time.sleep(SELENIUM_SLEEP_DURATION*3)

cancel_button_element = self.driver.find_element_by_class_name("cancelButton---CKAut")
cancel_button_element.click()

time.sleep(SELENIUM_SLEEP_DURATION)
# time.sleep(SELENIUM_SLEEP_DURATION)

def set_password(self, verification_link: str) -> None:
"""
@@ -219,7 +219,12 @@ class Pluralsight:
"""

self.driver.get(verification_link)
time.sleep(SELENIUM_SLEEP_DURATION)

WebDriverWait(self.driver, SELENIUM_TIMEOUT_IN_SECONDS) \
.until(expected_conditions
.presence_of_element_located((By.ID, "Password")))

# time.sleep(SELENIUM_SLEEP_DURATION)

password_input_element = self.driver.find_element_by_id("Password")
password_confirm_input_element = self.driver.find_element_by_id("PasswordConfirmation")
@@ -228,18 +233,35 @@ class Pluralsight:
password_input_element.send_keys(self.password)
password_confirm_input_element.send_keys(self.password)

time.sleep(1)

save_button_element.click()

time.sleep(SELENIUM_SLEEP_DURATION)
WebDriverWait(self.driver, SELENIUM_TIMEOUT_IN_SECONDS) \
.until(expected_conditions
.presence_of_element_located((By.ID, "Password")))

# time.sleep(SELENIUM_SLEEP_DURATION)

class Progress:
def __init__(self, id: int, is_all: bool):
self.id = id
self.course_link = ""
self.is_all = is_all

# endregion


# region Utility Functions


def get_name() -> Tuple[str, str]:
"""
Generate a random string to be used as first or last name

Returns:
str: Generated string
"""

request_url = "http://names.drycodes.com/1?nameOptions=boy_names" if random.choice([True, False]) \
else "http://names.drycodes.com/1?nameOptions=girl_names"

first_last_name = requests.get(request_url).text.strip('"[]').split('_')

return first_last_name


def get_password(min_length: int = 25, max_length: int = 50) -> str:
@@ -259,7 +281,10 @@ def get_password(min_length: int = 25, max_length: int = 50) -> str:
return password


def create_pluralsight_account(credential_file_path: str) -> Dict[str, str]:
# endregion


def create_pluralsight_account() -> Dict[str, str]:
"""
Creates new Pluralsight account and returns the email/password as a dictionary

@@ -270,7 +295,8 @@ def create_pluralsight_account(credential_file_path: str) -> Dict[str, str]:
disposable_email = DisposableMail()
password = get_password()

with Pluralsight(email=disposable_email.email_address, password=password,
with Pluralsight(email=disposable_email.email_address,
password=password,
is_headless=HIDE_SELENIUM_INSTANCES) as ps:
ps.register()

@@ -278,230 +304,144 @@ def create_pluralsight_account(credential_file_path: str) -> Dict[str, str]:

ps.set_password(verification_link=verification_link)

time.sleep(SELENIUM_SLEEP_DURATION)

with open(credential_file_path, 'w+') as account_file:
account_file.write(f"{disposable_email.email_address}\n")
account_file.write(f"{password}\n")

return {'email': disposable_email, 'password': password}


def download_course(course_link: str, username: str, password: str, save_directory_path: str) -> bool:
"""
Download the given course using the provided credential

Args:
course_link: The link of the course to download
username: Username (Email) of the Pluralsight account to be used for download
password: Password of the Pluralsight account to be used for download
save_directory_path: Absolute path of Root save directory

Returns: True/False bool value denoting the success status of the download
"""

try:
ydl_options['username'] = username
ydl_options['password'] = password

if IS_WINDOWS:
ydl_options[
'outtmpl'] = f"{save_directory_path}\\%(playlist)s\\%(chapter_number)s - %(chapter)s\\%(playlist_index)s - %(title)s.%(ext)s"
else:
ydl_options[
'outtmpl'] = f"{save_directory_path}/%(playlist)s/%(chapter_number)s - %(chapter)s/%(playlist_index)s - %(title)s.%(ext)s"

with youtube_dl.YoutubeDL(ydl_options) as ydl:
ydl.download([course_link])

return True
except:
return False
return {'email': disposable_email.email_address, 'password': password}


def get_all_pluralsight_paths(url: str) -> List[PluralsightPath]:
if url.startswith("http"):
json_string = requests.get(url).text
def get_paths_and_independent_courses(paths_file: str, courses_file: str) -> Tuple[List[PluralsightPath], List[str]]:
if paths_file.startswith("http"):
json_string = requests.get(paths_file).text
else:
json_string = Path(JSON_FILE_URL).read_text()
json_string = Path(paths_file).read_text()

all_pluralsight_paths_dicts_list = json.loads(json_string)
all_pluralsight_paths_list = [PluralsightPath(**dict) for dict in all_pluralsight_paths_dicts_list]
pluralsight_paths_dicts_list = json.loads(json_string)
pluralsight_paths_list = [PluralsightPath(**paths_dict) for paths_dict in pluralsight_paths_dicts_list]

return all_pluralsight_paths_list


def print_pluralsight_paths_and_courses(pluralsight_paths_list: List[PluralsightPath],
search_query: str) -> List[PluralsightPath]:
queried_paths_list = []
for pluralsight_path in pluralsight_paths_list:
if search_query.lower() not in pluralsight_path.title.lower():
continue

print(f"{pluralsight_path.id} | {pluralsight_path.title}")

course_serial = 1
print("\t0 - [DOWNLOAD ALL COURSES]")
for course_link in pluralsight_path.course_links:
print(f"\t{course_serial} - {course_link}")
course_serial += 1

queried_paths_list.append(pluralsight_path)
if courses_file.startswith("http"):
courses_list_string = requests.get(paths_file).text
else:
courses_list_string = Path(paths_file).read_text()

print("0 | [DOWNLOAD ALL PATHS]")
courses_list = list(filter(None, courses_list_string.split('\n')))

return queried_paths_list
return pluralsight_paths_list, courses_list


def get_directory_full_path(root_directory_path: str, pluralsight_path: PluralsightPath) -> str:
directory_name = f"{pluralsight_path.id:03d} - {pluralsight_path.slug}" if IS_WINDOWS \
else f"{pluralsight_path.id:03d} - {pluralsight_path.title}"
directory_name = f"{pluralsight_path.id:03d} - {pluralsight_path.title}" if not IS_WINDOWS \
else f"{pluralsight_path.id:03d} - {pluralsight_path.slug}"

directory_full_path = os.path.join(root_directory_path, directory_name)

return directory_full_path


def download_pluralsight_paths(pluralsight_paths=List[PluralsightPath]):
pass
def download_single_course(course_link: str, username: str, password: str, save_directory_path: str) -> bool:
"""
Download the given course using the provided credential

Args:
course_link: The link of the course to download
username: Username (Email) of the Pluralsight account to be used for download
password: Password of the Pluralsight account to be used for download
save_directory_path: Absolute path of Root save directory

# region Click Prompts
Returns: True/False bool value denoting the success status of the download
"""

def prompt_download_all() -> bool:
is_download_all = click.prompt("Download All? ", default=False, show_default=True, type=bool, prompt_suffix="")
return is_download_all
global DOWNLOADED_COURSE_LINKS_LIST

if course_link in DOWNLOADED_COURSE_LINKS_LIST:
return True

def prompt_paths_and_courses_selection(all_pluralsight_paths_list: List[PluralsightPath]) -> UserSelection:
while True:
retry_count = 0
while retry_count < DOWNLOAD_RETRY_COUNT:
try:
search_query = click.prompt("Search Query", type=str)

queried_paths = print_pluralsight_paths_and_courses(all_pluralsight_paths_list, search_query)

selected_paths = queried_paths
while True:
path_id = click.prompt("Select Path (by id)", type=int, default=0, show_default=True)

if path_id == 0:
return UserSelection(selected_paths=queried_paths)
ydl_options['username'] = username
ydl_options['password'] = password

if IS_WINDOWS:
ydl_options[
'outtmpl'] = f"{save_directory_path}\\%(playlist)s\\%(chapter_number)s - %(chapter)s\\%(playlist_index)s - %(title)s.%(ext)s"
else:
ydl_options[
'outtmpl'] = f"{save_directory_path}/%(playlist)s/%(chapter_number)s - %(chapter)s/%(playlist_index)s - %(title)s.%(ext)s"

with youtube_dl.YoutubeDL(ydl_options) as ydl:
ydl.download([course_link])

DOWNLOADED_COURSE_LINKS_LIST.append(course_link)
return True
except KeyboardInterrupt:
break
except:
retry_count += 1

for path in queried_paths:
if path_id == path.id:
selected_paths = [path]
break
else:
continue
break
return False

while True:
course_id = click.prompt("Select Course (by id)", type=int, default=0, show_default=True)
if 0 <= course_id <= len(selected_paths[0].course_links):
break

if course_id == 0:
return UserSelection(selected_paths=selected_paths)
return UserSelection(selected_paths, selected_paths[0].course_links[course_id - 1])
def download_batch_course(course_links_list: List[str], username: str, password: str, save_directory_path: str) -> bool:
download_single_course_function = partial(download_single_course,
username=username,
password=password,
save_directory_path=save_directory_path)

except Exception as e:
print(e)
if click.prompt("Exit", default=False, show_default=True, type=bool):
return UserSelection([], is_exit=True)
with Pool(MAX_PARALLEL_COURSE_DOWNLOAD_COUNT) as p:
download_results = map(lambda link: link, p.map(download_single_course_function, course_links_list))

return False in download_results

# endregion

def download_pluralsight_path(pluralsight_paths_list: List[PluralsightPath], path_id: int) -> None:
credential_dict = create_pluralsight_account()

def get_credential() -> Tuple[str, str]:
if not os.path.exists(CREDENTIAL_FILE_PATH):
print("CREATING NEW PLURALSIGHT ACCOUNT")
create_pluralsight_account(CREDENTIAL_FILE_PATH)
print("SUCCESS! NEW PLURALSIGHT ACCOUNT CREATED.")
for pluralsight_path in pluralsight_paths_list:
if pluralsight_path.id == path_id:
save_directory_path = get_directory_full_path(SAVE_PATHS_DIRECTORY_PATH, pluralsight_path)

with open(CREDENTIAL_FILE_PATH, 'r') as account_file:
lines = account_file.readlines()
credential = lines[0].rstrip(), lines[1].rstrip()
download_batch_course(course_links_list=pluralsight_path.course_links,
username=credential_dict['email'],
password=credential_dict['password'],
save_directory_path=save_directory_path)

return credential

def download_independent_courses(course_list: List[str]) -> None:
credential_dict = create_pluralsight_account()

def save_progress(path_id: int, course_index: int):
with open(PROGRESS_FILE_PATH, 'w+') as progress_file:
progress_file.write(f"{path_id}|{course_index}")
download_batch_course(course_links_list=course_list,
username=credential_dict['email'],
password=credential_dict['password'],
save_directory_path=SAVE_COURSES_DIRECTORY_PATH)


def download_all(all_pluralsight_paths: List[PluralsightPath]):
global SAVE_DIRECTORY_PATH
def main():
global DOWNLOADED_COURSE_LINKS_LIST

while True:
try:
if not os.path.isfile(PROGRESS_FILE_PATH):
save_progress(path_id=1, course_index=0)

current_path_id, current_course_index = map(int, Path(PROGRESS_FILE_PATH).read_text().rstrip().split("|"))

while current_path_id <= len(all_pluralsight_paths):
email, password = get_credential()

for pluralsight_path in all_pluralsight_paths:
if int(current_path_id) == pluralsight_path.id:
while current_course_index < len(pluralsight_path.course_links):
save_directory_path = get_directory_full_path(SAVE_DIRECTORY_PATH, pluralsight_path)

course_link = pluralsight_path.course_links[current_course_index]
download_result = download_course(course_link, email, password, save_directory_path)
if os.path.isfile(DOWNLOADED_COURSES_FILE_LOCATION):
DOWNLOADED_COURSE_LINKS_LIST = list(
filter(None, Path(DOWNLOADED_COURSES_FILE_LOCATION).read_text().split('\n')))

if not download_result:
raise Exception("Failed to download course")
paths_list, individual_courses_list = get_paths_and_independent_courses(PATHS_JSON_LOCATION,
INDEPENDENT_COURSES_FILE_LOCATION)

current_course_index += 1
save_progress(current_path_id, current_course_index)
if len(sys.argv) == 1:
download_independent_courses(individual_courses_list)
else:
for arg in sys.argv[1:]:
if 1 <= int(arg) <= len(paths_list):
download_pluralsight_path(paths_list, int(arg))

current_path_id += 1
current_course_index = 0
save_progress(current_path_id, current_course_index)

break

except(KeyboardInterrupt, SystemExit):
print("EXITING PROGRAM")
break
except Exception as exception:
os.remove(CREDENTIAL_FILE_PATH)
print(exception)
retry = input("Retry (Y/y for yes): ").lower()


def main():
global SAVE_DIRECTORY_PATH

all_pluralsight_paths_list = get_all_pluralsight_paths(JSON_FILE_URL)

if prompt_download_all():
download_all(all_pluralsight_paths_list)
return

user_selection = prompt_paths_and_courses_selection(all_pluralsight_paths_list)

if user_selection.is_exit:
return

while True:
email_address, password = get_credential()

if user_selection.selected_course_link:
download_course(course_link=user_selection.selected_course_link,
username=email_address,
password=password,
save_directory_path=SAVE_DIRECTORY_PATH)
else:
for path in user_selection.selected_paths:
save_directory_path = get_directory_full_path(SAVE_DIRECTORY_PATH, path)
for course_link in path.course_links:
download_course(course_link=course_link,
username=email_address,
password=password,
save_directory_path=save_directory_path)
if retry != 'y':
break
finally:
with open(DOWNLOADED_COURSES_FILE_LOCATION, 'w+') as downloaded_file:
downloaded_file.write('\n'.join(set(DOWNLOADED_COURSE_LINKS_LIST)))


if __name__ == '__main__':

Loading…
Cancel
Save