You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

pluralsight.py 17KB


  1. import json
  2. import os
  3. import random
  4. import re
  5. import secrets
  6. import string
  7. import time
  8. from pathlib import Path
  9. from sys import platform
  10. from typing import Dict, List, Tuple
  11. import click
  12. import requests
  13. import youtube_dl
  14. from bs4 import BeautifulSoup
  15. from selenium import webdriver
  16. from selenium.webdriver.firefox.options import Options
  17. # region Global Constant(s) and Readonly Variable(s)
  18. # True/False to determine whether selenium instances will be visible or not (headless)
  19. HIDE_SELENIUM_INSTANCES = True
  20. # The maximum number of courses to download from a single account
  21. MAX_COURSE_DOWNLOAD_COUNT = 5
  22. # Denotes Time.Sleep() duration in seconds
  23. SELENIUM_SLEEP_DURATION = 5
  24. # Minimum number of seconds to wait between consecutive video downloads
  25. MIN_VIDEO_DOWNLOAD_DELAY = 1
  26. # Minimum number of seconds to wait between consecutive video downloads
  27. MAX_VIDEO_DOWNLOAD_DELAY = 10
  28. # Check if current OS/platform is Windows
  29. IS_WINDOWS = platform.startswith("win")
  30. # Master Directory Path (Default: Working Directory)
  31. MASTER_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
  32. # Path of the text file where pluralsight account details will be stored
  33. CREDENTIAL_FILE_PATH = os.path.join(MASTER_DIRECTORY, "credential.txt")
  34. # Path of the archive text file to log all downloaded videos
  35. ARCHIVE_FILE_PATH = os.path.join(MASTER_DIRECTORY, "archive.txt")
  36. # Progress text file path (Progress file stores progress of download_all())
  37. PROGRESS_FILE_PATH = os.path.join(MASTER_DIRECTORY, "progress.txt")
  38. # Path of the directory where downloaded courses will be saved
  39. SAVE_DIRECTORY_PATH = os.path.join(MASTER_DIRECTORY, "Courses")
  40. # JSON File (.json) containing the JSON of the paths/courses dump of Pluralsight
  41. JSON_FILE_URL = os.path.join(MASTER_DIRECTORY, "pluralsight.json")
  42. # Options for youtube-dl. For a complete list of options, check https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312
  43. ydl_options = {
  44. 'writesubtitles': True,
  45. 'nooverwrites': True,
  46. 'sleep_interval': MIN_VIDEO_DOWNLOAD_DELAY,
  47. 'max_sleep_interval': MAX_VIDEO_DOWNLOAD_DELAY,
  48. 'download_archive': ARCHIVE_FILE_PATH
  49. }
  50. # endregion
  51. class PluralsightPath(object):
  52. def __init__(self, id: int, link: str, title: str, slug: str, course_links: List[str]):
  53. self.id = id
  54. self.link = link
  55. self.title = title
  56. self.slug = slug
  57. self.course_links = course_links
  58. class UserSelection:
  59. def __init__(self, selected_paths: List[PluralsightPath], selected_course_link: str = "", is_exit: bool = False):
  60. self.selected_paths = selected_paths
  61. self.selected_course_link = selected_course_link
  62. self.is_exit = is_exit
  63. class DisposableMail:
  64. """
  65. This class is used to generate random disposable emails
  66. """
  67. def __init__(self):
  68. self.email_address = requests.post("https://gmailnator.com/index/indexquery", {'action': 'GenerateEmail'}).text
  69. def get_mail_item_id(self) -> object:
  70. post_url = "https://gmailnator.com/mailbox/mailboxquery"
  71. post_data = {
  72. 'action': 'LoadMailList',
  73. 'Email_address': self.email_address
  74. }
  75. while True:
  76. try:
  77. time.sleep(1)
  78. response_text = requests.post(post_url, post_data).json()[0]['content']
  79. result = re.findall('#(.*)\\">', response_text)
  80. mail_id = result[0]
  81. return mail_id
  82. except:
  83. pass
  84. def get_verification_link(self) -> str:
  85. post_url = "https://gmailnator.com/mailbox/get_single_message/"
  86. post_data = {
  87. 'action': 'LoadMailList',
  88. 'message_id': self.get_mail_item_id(),
  89. 'email': self.email_address.split("+")[0]
  90. }
  91. response_data = requests.post(post_url, post_data).text
  92. soup = BeautifulSoup(response_data)
  93. for link in soup.findAll('a', href=True):
  94. if "https://app.pluralsight.com/id/forgotpassword/reset?token" in link['href']:
  95. return link['href']
  96. class Pluralsight:
  97. """
  98. This class handles the registration and verification of new Pluralsight accounts
  99. """
  100. def __init__(self, email: str, password: str, is_headless: bool = True):
  101. if is_headless:
  102. options = Options()
  103. options.add_argument("--headless")
  104. self.driver = webdriver.Firefox(options=options)
  105. else:
  106. self.driver = webdriver.Firefox()
  107. self.email = email
  108. self.password = password
  109. def __enter__(self):
  110. return self
  111. def __exit__(self, exc_type, exc_val, exc_tb):
  112. self.driver.quit()
  113. @staticmethod
  114. def get_name() -> Tuple[str, str]:
  115. """
  116. Generate a random string to be used as first or last name
  117. Returns:
  118. str: Generated string
  119. """
  120. request_url = "http://names.drycodes.com/1?nameOptions=boy_names" if random.choice([True, False]) \
  121. else "http://names.drycodes.com/1?nameOptions=girl_names"
  122. first_last_name = requests.get(request_url).text.strip('"[]').split('_')
  123. return first_last_name
  124. def register(self) -> None:
  125. """
  126. Registers new Pluralsight account
  127. """
  128. self.driver.get("https://www.pluralsight.com/offer/2020/free-april-month")
  129. time.sleep(SELENIUM_SLEEP_DURATION)
  130. accept_cookie_button_element = self.driver.find_element_by_class_name("cookie_notification--opt_in")
  131. accept_cookie_button_element.click()
  132. time.sleep(1)
  133. sign_up_now_button_element = self.driver.find_element_by_xpath('//a[@data-aa-title="Free-April-Start-Now"]')
  134. sign_up_now_button_element.click()
  135. time.sleep(1)
  136. email_input_element = self.driver.find_element_by_name("email")
  137. firstname_input_element = self.driver.find_element_by_name("firstname")
  138. lastname_input_element = self.driver.find_element_by_name("lastname")
  139. tos_checkbox_element = self.driver.find_element_by_name("optInBox")
  140. firstName, lastName = self.get_name()
  141. email_input_element.send_keys(self.email)
  142. firstname_input_element.send_keys(firstName)
  143. lastname_input_element.send_keys(lastName)
  144. tos_checkbox_element.click()
  145. time.sleep(SELENIUM_SLEEP_DURATION)
  146. create_account_button_element = self.driver.find_element_by_xpath(
  147. "//*[contains(text(), 'I agree, activate benefit')]")
  148. create_account_button_element.click()
  149. time.sleep(30)
  150. cancel_button_element = self.driver.find_element_by_class_name("cancelButton---CKAut")
  151. cancel_button_element.click()
  152. time.sleep(SELENIUM_SLEEP_DURATION)
  153. def set_password(self, verification_link: str) -> None:
  154. """
  155. Sets password in the given verification link
  156. Args:
  157. verification_link: The verification link (as string) to set up password
  158. """
  159. self.driver.get(verification_link)
  160. time.sleep(SELENIUM_SLEEP_DURATION)
  161. password_input_element = self.driver.find_element_by_id("Password")
  162. password_confirm_input_element = self.driver.find_element_by_id("PasswordConfirmation")
  163. save_button_element = self.driver.find_element_by_class_name("psds-button--appearance-primary")
  164. password_input_element.send_keys(self.password)
  165. password_confirm_input_element.send_keys(self.password)
  166. time.sleep(1)
  167. save_button_element.click()
  168. time.sleep(SELENIUM_SLEEP_DURATION)
  169. class Progress:
  170. def __init__(self, id: int, is_all: bool):
  171. self.id = id
  172. self.course_link = ""
  173. self.is_all = is_all
  174. def get_password(min_length: int = 25, max_length: int = 50) -> str:
  175. """
  176. Generates a random password using ascii letters and numerical digits
  177. Args:
  178. min_length: Minimum length of the password, default is 25
  179. max_length: Minimum length of the password, default is 50
  180. Returns: Generated password as string
  181. """
  182. length = random.randint(min_length, max_length)
  183. alphabet = string.ascii_letters + string.digits
  184. password = ''.join(secrets.choice(alphabet) for _ in range(length))
  185. return password
  186. def create_pluralsight_account(credential_file_path: str) -> Dict[str, str]:
  187. """
  188. Creates new Pluralsight account and returns the email/password as a dictionary
  189. Returns:
  190. Dict[str, str]: Dictionary containing email and password with eponymous pair keys
  191. """
  192. disposable_email = DisposableMail()
  193. password = get_password()
  194. with Pluralsight(email=disposable_email.email_address, password=password,
  195. is_headless=HIDE_SELENIUM_INSTANCES) as ps:
  196. ps.register()
  197. verification_link = disposable_email.get_verification_link()
  198. ps.set_password(verification_link=verification_link)
  199. time.sleep(SELENIUM_SLEEP_DURATION)
  200. with open(credential_file_path, 'w+') as account_file:
  201. account_file.write(f"{disposable_email.email_address}\n")
  202. account_file.write(f"{password}\n")
  203. return {'email': disposable_email, 'password': password}
  204. def download_course(course_link: str, username: str, password: str, save_directory_path: str) -> bool:
  205. """
  206. Download the given course using the provided credential
  207. Args:
  208. course_link: The link of the course to download
  209. username: Username (Email) of the Pluralsight account to be used for download
  210. password: Password of the Pluralsight account to be used for download
  211. save_directory_path: Absolute path of Root save directory
  212. Returns: True/False bool value denoting the success status of the download
  213. """
  214. try:
  215. ydl_options['username'] = username
  216. ydl_options['password'] = password
  217. if IS_WINDOWS:
  218. ydl_options[
  219. 'outtmpl'] = f"{save_directory_path}\\%(playlist)s\\%(chapter_number)s - %(chapter)s\\%(playlist_index)s - %(title)s.%(ext)s"
  220. else:
  221. ydl_options[
  222. 'outtmpl'] = f"{save_directory_path}/%(playlist)s/%(chapter_number)s - %(chapter)s/%(playlist_index)s - %(title)s.%(ext)s"
  223. with youtube_dl.YoutubeDL(ydl_options) as ydl:
  224. ydl.download([course_link])
  225. return True
  226. except:
  227. return False
  228. def get_all_pluralsight_paths(url: str) -> List[PluralsightPath]:
  229. if url.startswith("http"):
  230. json_string = requests.get(url).text
  231. else:
  232. json_string = Path(JSON_FILE_URL).read_text()
  233. all_pluralsight_paths_dicts_list = json.loads(json_string)
  234. all_pluralsight_paths_list = [PluralsightPath(**dict) for dict in all_pluralsight_paths_dicts_list]
  235. return all_pluralsight_paths_list
  236. def print_pluralsight_paths_and_courses(pluralsight_paths_list: List[PluralsightPath],
  237. search_query: str) -> List[PluralsightPath]:
  238. queried_paths_list = []
  239. for pluralsight_path in pluralsight_paths_list:
  240. if search_query.lower() not in pluralsight_path.title.lower():
  241. continue
  242. print(f"{pluralsight_path.id} | {pluralsight_path.title}")
  243. course_serial = 1
  244. print("\t0 - [DOWNLOAD ALL COURSES]")
  245. for course_link in pluralsight_path.course_links:
  246. print(f"\t{course_serial} - {course_link}")
  247. course_serial += 1
  248. queried_paths_list.append(pluralsight_path)
  249. print("0 | [DOWNLOAD ALL PATHS]")
  250. return queried_paths_list
  251. def get_directory_full_path(root_directory_path: str, pluralsight_path: PluralsightPath) -> str:
  252. directory_name = f"{pluralsight_path.id:03d} - {pluralsight_path.slug}" if IS_WINDOWS \
  253. else f"{pluralsight_path.id:03d} - {pluralsight_path.title}"
  254. directory_full_path = os.path.join(root_directory_path, directory_name)
  255. return directory_full_path
  256. def download_pluralsight_paths(pluralsight_paths=List[PluralsightPath]):
  257. pass
  258. # region Click Prompts
  259. def prompt_download_all() -> bool:
  260. is_download_all = click.prompt("Download All? ", default=False, show_default=True, type=bool, prompt_suffix="")
  261. return is_download_all
  262. def prompt_paths_and_courses_selection(all_pluralsight_paths_list: List[PluralsightPath]) -> UserSelection:
  263. while True:
  264. try:
  265. search_query = click.prompt("Search Query", type=str)
  266. queried_paths = print_pluralsight_paths_and_courses(all_pluralsight_paths_list, search_query)
  267. selected_paths = queried_paths
  268. while True:
  269. path_id = click.prompt("Select Path (by id)", type=int, default=0, show_default=True)
  270. if path_id == 0:
  271. return UserSelection(selected_paths=queried_paths)
  272. for path in queried_paths:
  273. if path_id == path.id:
  274. selected_paths = [path]
  275. break
  276. else:
  277. continue
  278. break
  279. while True:
  280. course_id = click.prompt("Select Course (by id)", type=int, default=0, show_default=True)
  281. if 0 <= course_id <= len(selected_paths[0].course_links):
  282. break
  283. if course_id == 0:
  284. return UserSelection(selected_paths=selected_paths)
  285. return UserSelection(selected_paths, selected_paths[0].course_links[course_id - 1])
  286. except Exception as e:
  287. print(e)
  288. if click.prompt("Exit", default=False, show_default=True, type=bool):
  289. return UserSelection([], is_exit=True)
  290. # endregion
  291. def get_credential() -> Tuple[str, str]:
  292. if not os.path.exists(CREDENTIAL_FILE_PATH):
  293. print("CREATING NEW PLURALSIGHT ACCOUNT")
  294. create_pluralsight_account(CREDENTIAL_FILE_PATH)
  295. print("SUCCESS! NEW PLURALSIGHT ACCOUNT CREATED.")
  296. with open(CREDENTIAL_FILE_PATH, 'r') as account_file:
  297. lines = account_file.readlines()
  298. credential = lines[0].rstrip(), lines[1].rstrip()
  299. return credential
  300. def save_progress(path_id: int, course_index: int):
  301. with open(PROGRESS_FILE_PATH, 'w+') as progress_file:
  302. progress_file.write(f"{path_id}|{course_index}")
  303. def download_all(all_pluralsight_paths: List[PluralsightPath]):
  304. global SAVE_DIRECTORY_PATH
  305. while True:
  306. try:
  307. if not os.path.isfile(PROGRESS_FILE_PATH):
  308. save_progress(path_id=1, course_index=0)
  309. current_path_id, current_course_index = map(int, Path(PROGRESS_FILE_PATH).read_text().rstrip().split("|"))
  310. while current_path_id <= len(all_pluralsight_paths):
  311. email, password = get_credential()
  312. for pluralsight_path in all_pluralsight_paths:
  313. if int(current_path_id) == pluralsight_path.id:
  314. while current_course_index < len(pluralsight_path.course_links):
  315. save_directory_path = get_directory_full_path(SAVE_DIRECTORY_PATH, pluralsight_path)
  316. course_link = pluralsight_path.course_links[current_course_index]
  317. download_result = download_course(course_link, email, password, save_directory_path)
  318. if not download_result:
  319. raise Exception("Failed to download course")
  320. current_course_index += 1
  321. save_progress(current_path_id, current_course_index)
  322. current_path_id += 1
  323. current_course_index = 0
  324. save_progress(current_path_id, current_course_index)
  325. break
  326. except(KeyboardInterrupt, SystemExit):
  327. print("EXITING PROGRAM")
  328. break
  329. except Exception as exception:
  330. os.remove(CREDENTIAL_FILE_PATH)
  331. print(exception)
  332. def main():
  333. global SAVE_DIRECTORY_PATH
  334. all_pluralsight_paths_list = get_all_pluralsight_paths(JSON_FILE_URL)
  335. if prompt_download_all():
  336. download_all(all_pluralsight_paths_list)
  337. return
  338. user_selection = prompt_paths_and_courses_selection(all_pluralsight_paths_list)
  339. if user_selection.is_exit:
  340. return
  341. while True:
  342. email_address, password = get_credential()
  343. if user_selection.selected_course_link:
  344. download_course(course_link=user_selection.selected_course_link, username=email_address, password=password)
  345. else:
  346. for path in user_selection.selected_paths:
  347. save_directory_path = get_directory_full_path(SAVE_DIRECTORY_PATH, path)
  348. for course_link in path.course_links:
  349. download_course(course_link=course_link,
  350. username=email_address,
  351. password=password,
  352. save_directory_path=save_directory_path)
  353. if __name__ == '__main__':
  354. main()