You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

pluralsight.py 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. import json
  2. import os
  3. import random
  4. import re
  5. import secrets
  6. import string
  7. import time
  8. from pathlib import Path
  9. from sys import platform
  10. from typing import Dict, List, Tuple
  11. import click
  12. import requests
  13. import youtube_dl
  14. from bs4 import BeautifulSoup
  15. from selenium import webdriver
  16. from selenium.webdriver.firefox.options import Options
  17. # region Global Constant(s) and Readonly Variable(s)
  18. # True/False to determine whether selenium instances will be visible or not (headless)
  19. HIDE_SELENIUM_INSTANCES = False
  20. # The maximum number of courses to download from a single account
  21. MAX_COURSE_DOWNLOAD_COUNT = 5
  22. # Denotes Time.Sleep() duration in seconds
  23. SLEEP_DURATION = 5
  24. # Check if current OS/platform is Windows
  25. IS_WINDOWS = platform.startswith("win")
  26. # Master Directory Path
  27. WORKING_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
  28. # Path of the text file where pluralsight account details will be stored
  29. CREDENTIAL_FILE_PATH = os.path.join(WORKING_DIRECTORY, "credential.txt")
  30. # Path of the directory where downloaded courses will be saved
  31. SAVE_DIRECTORY_PATH = os.path.join(WORKING_DIRECTORY, "Courses")
  32. # JSON File (.json) containing the JSON of the paths/courses dump of Pluralsight
  33. # JSON_FILE_URL = os.path.join(WORKING_DIRECTORY, "pluralsight.json")
  34. JSON_FILE_URL = "https://git.teknik.io/CanWePlsRapeTheShitOuttaPluralsight/RapePluralsight/raw/branch/master/pluralsight.json"
  35. # Options for youtube-dl. For a complete list of options, check https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312
  36. ydl_options = {
  37. 'writesubtitles': True,
  38. 'nooverwrites': True,
  39. 'sleep_interval': 10,
  40. 'max_sleep_interval': 20,
  41. }
  42. # endregion
  43. class PluralsightPath(object):
  44. def __init__(self, id: int, link: str, title: str, slug: str, course_links: List[str]):
  45. self.id = id
  46. self.link = link
  47. self.title = title
  48. self.slug = slug
  49. self.course_links = course_links
  50. class UserSelection:
  51. def __init__(self, selected_paths: List[PluralsightPath], selected_course_link: str = "", is_exit: bool = False):
  52. self.selected_paths = selected_paths
  53. self.selected_course_link = selected_course_link
  54. self.is_exit = is_exit
  55. class DisposableMail:
  56. """
  57. This class is used to generate random disposable emails
  58. """
  59. def __init__(self):
  60. self.email_address = requests.post("https://gmailnator.com/index/indexquery", {'action': 'GenerateEmail'}).text
  61. def get_mail_item_id(self) -> object:
  62. post_url = "https://gmailnator.com/mailbox/mailboxquery"
  63. post_data = {
  64. 'action': 'LoadMailList',
  65. 'Email_address': self.email_address
  66. }
  67. while True:
  68. try:
  69. time.sleep(1)
  70. response_text = requests.post(post_url, post_data).json()[0]['content']
  71. result = re.findall('#(.*)\\">', response_text)
  72. mail_id = result[0]
  73. return mail_id
  74. except:
  75. pass
  76. def get_verification_link(self) -> str:
  77. post_url = "https://gmailnator.com/mailbox/get_single_message/"
  78. post_data = {
  79. 'action': 'LoadMailList',
  80. 'message_id': self.get_mail_item_id(),
  81. 'email': self.email_address.split("+")[0]
  82. }
  83. response_data = requests.post(post_url, post_data).text
  84. soup = BeautifulSoup(response_data)
  85. for link in soup.findAll('a', href=True):
  86. if "https://app.pluralsight.com/id/forgotpassword/reset?token" in link['href']:
  87. return link['href']
  88. class Pluralsight:
  89. """
  90. This class handles the registration and verification of new Pluralsight accounts
  91. """
  92. def __init__(self, email: str, password: str, is_headless: bool = True):
  93. if is_headless:
  94. options = Options()
  95. options.add_argument("--headless")
  96. self.driver = webdriver.Firefox(options=options)
  97. else:
  98. self.driver = webdriver.Firefox()
  99. self.email = email
  100. self.password = password
  101. def __enter__(self):
  102. return self
  103. def __exit__(self, exc_type, exc_val, exc_tb):
  104. self.driver.quit()
  105. @staticmethod
  106. def get_name() -> Tuple[str, str]:
  107. """
  108. Generate a random string to be used as first or last name
  109. Returns:
  110. str: Generated string
  111. """
  112. request_url = "http://names.drycodes.com/1?nameOptions=boy_names" if random.choice([True, False]) \
  113. else "http://names.drycodes.com/1?nameOptions=girl_names"
  114. first_last_name = requests.get(request_url).text.strip('"[]').split('_')
  115. return first_last_name
  116. def register(self) -> None:
  117. """
  118. Registers new Pluralsight account
  119. """
  120. self.driver.get("https://www.pluralsight.com/offer/2020/free-april-month")
  121. time.sleep(SLEEP_DURATION)
  122. accept_cookie_button_element = self.driver.find_element_by_class_name("cookie_notification--opt_in")
  123. accept_cookie_button_element.click()
  124. time.sleep(1)
  125. sign_up_now_button_element = self.driver.find_element_by_xpath('//a[@data-aa-title="Free-April-Start-Now"]')
  126. sign_up_now_button_element.click()
  127. time.sleep(1)
  128. email_input_element = self.driver.find_element_by_name("email")
  129. firstname_input_element = self.driver.find_element_by_name("firstname")
  130. lastname_input_element = self.driver.find_element_by_name("lastname")
  131. tos_checkbox_element = self.driver.find_element_by_name("optInBox")
  132. firstName, lastName = self.get_name()
  133. email_input_element.send_keys(self.email)
  134. firstname_input_element.send_keys(firstName)
  135. lastname_input_element.send_keys(lastName)
  136. tos_checkbox_element.click()
  137. time.sleep(SLEEP_DURATION)
  138. create_account_button_element = self.driver.find_element_by_xpath(
  139. "//*[contains(text(), 'I agree, activate benefit')]")
  140. create_account_button_element.click()
  141. time.sleep(30)
  142. cancel_button_element = self.driver.find_element_by_class_name("cancelButton---CKAut")
  143. cancel_button_element.click()
  144. time.sleep(SLEEP_DURATION)
  145. def set_password(self, verification_link: str) -> None:
  146. """
  147. Sets password in the given verification link
  148. Args:
  149. verification_link: The verification link (as string) to set up password
  150. """
  151. self.driver.get(verification_link)
  152. time.sleep(SLEEP_DURATION)
  153. password_input_element = self.driver.find_element_by_id("Password")
  154. password_confirm_input_element = self.driver.find_element_by_id("PasswordConfirmation")
  155. save_button_element = self.driver.find_element_by_class_name("psds-button--appearance-primary")
  156. password_input_element.send_keys(self.password)
  157. password_confirm_input_element.send_keys(self.password)
  158. time.sleep(1)
  159. save_button_element.click()
  160. time.sleep(SLEEP_DURATION)
  161. class Progress:
  162. def __init__(self, id: int, is_all: bool):
  163. self.id = id
  164. self.course_link = ""
  165. self.is_all = is_all
  166. def get_password(min_length: int = 25, max_length: int = 50) -> str:
  167. """
  168. Generates a random password using ascii letters and numerical digits
  169. Args:
  170. min_length: Minimum length of the password, default is 25
  171. max_length: Minimum length of the password, default is 50
  172. Returns: Generated password as string
  173. """
  174. length = random.randint(min_length, max_length)
  175. alphabet = string.ascii_letters + string.digits
  176. password = ''.join(secrets.choice(alphabet) for _ in range(length))
  177. return password
  178. def create_pluralsight_account(credential_file_path: str) -> Dict[str, str]:
  179. """
  180. Creates new Pluralsight account and returns the email/password as a dictionary
  181. Returns:
  182. Dict[str, str]: Dictionary containing email and password with eponymous pair keys
  183. """
  184. disposable_email = DisposableMail()
  185. password = get_password()
  186. with Pluralsight(email=disposable_email.email_address, password=password,
  187. is_headless=HIDE_SELENIUM_INSTANCES) as ps:
  188. ps.register()
  189. verification_link = disposable_email.get_verification_link()
  190. ps.set_password(verification_link=verification_link)
  191. time.sleep(SLEEP_DURATION)
  192. with open(credential_file_path, 'w+') as account_file:
  193. account_file.write(f"{disposable_email.email_address}\n")
  194. account_file.write(f"{password}\n")
  195. return {'email': disposable_email, 'password': password}
  196. def download_course(course_link: str, username: str, password: str) -> bool:
  197. """
  198. Download the given course using the provided credential
  199. Args:
  200. course_link: The link of the course to download
  201. username: Username (Email) of the Pluralsight account to be used for download
  202. password: Password of the Pluralsight account to be used for download
  203. Returns: True/False bool value denoting the success status of the download
  204. """
  205. try:
  206. ydl_options['username'] = username
  207. ydl_options['password'] = password
  208. if IS_WINDOWS:
  209. ydl_options[
  210. 'outtmpl'] = f"{SAVE_DIRECTORY_PATH}\\%(playlist)s\\%(chapter_number)s - %(chapter)s\\%(playlist_index)s - %(title)s.%(ext)s"
  211. else:
  212. ydl_options[
  213. 'outtmpl'] = f"{SAVE_DIRECTORY_PATH}/%(playlist)s/%(chapter_number)s - %(chapter)s/%(playlist_index)s - %(title)s.%(ext)s"
  214. with youtube_dl.YoutubeDL(ydl_options) as ydl:
  215. ydl.download([course_link])
  216. return True
  217. except:
  218. return False
  219. def get_all_pluralsight_paths(url: str) -> List[PluralsightPath]:
  220. if url.startswith("http"):
  221. json_string = requests.get(url).text
  222. else:
  223. json_string = Path(JSON_FILE_URL).read_text()
  224. all_pluralsight_paths_dicts_list = json.loads(json_string)
  225. all_pluralsight_paths_list = [PluralsightPath(**dict) for dict in all_pluralsight_paths_dicts_list]
  226. return all_pluralsight_paths_list
  227. def print_pluralsight_paths_and_courses(pluralsight_paths_list: List[PluralsightPath],
  228. search_query: str) -> List[PluralsightPath]:
  229. queried_paths_list = []
  230. for pluralsight_path in pluralsight_paths_list:
  231. if search_query.lower() not in pluralsight_path.title.lower():
  232. continue
  233. print(f"{pluralsight_path.id} | {pluralsight_path.title}")
  234. course_serial = 1
  235. print("\t0 - [DOWNLOAD ALL COURSES]")
  236. for course_link in pluralsight_path.course_links:
  237. print(f"\t{course_serial} - {course_link}")
  238. course_serial += 1
  239. queried_paths_list.append(pluralsight_path)
  240. print("0 | [DOWNLOAD ALL PATHS]")
  241. return queried_paths_list
  242. def get_directory_full_path(root_directory_path: str, pluralsight_path: PluralsightPath) -> str:
  243. directory_name = f"{pluralsight_path.id:03d} - {pluralsight_path.slug}" if IS_WINDOWS \
  244. else f"{pluralsight_path.id:03d} - {pluralsight_path.title}"
  245. directory_full_path = os.path.join(root_directory_path, directory_name)
  246. return directory_full_path
  247. def download_all(all_pluralsight_courses: List[PluralsightPath]):
  248. # Todo Download all
  249. pass
  250. def download_pluralsight_paths(pluralsight_paths=List[PluralsightPath]):
  251. pass
  252. # region Click Prompts
  253. def prompt_download_all() -> bool:
  254. is_download_all = click.prompt("Download All? ", default=False, show_default=True, type=bool, prompt_suffix="")
  255. return is_download_all
  256. def prompt_paths_and_courses_selection(all_pluralsight_paths_list: List[PluralsightPath]) -> UserSelection:
  257. while True:
  258. try:
  259. search_query = click.prompt("Search Query", type=str)
  260. queried_paths = print_pluralsight_paths_and_courses(all_pluralsight_paths_list, search_query)
  261. selected_paths = queried_paths
  262. while True:
  263. path_id = click.prompt("Select Path (by id)", type=int, default=0, show_default=True)
  264. if path_id == 0:
  265. return UserSelection(selected_paths=queried_paths)
  266. for path in queried_paths:
  267. if path_id == path.id:
  268. selected_paths = [path]
  269. break
  270. else:
  271. continue
  272. break
  273. while True:
  274. course_id = click.prompt("Select Course (by id)", type=int, default=0, show_default=True)
  275. if 0 <= course_id <= len(selected_paths[0].course_links):
  276. break
  277. if course_id == 0:
  278. return UserSelection(selected_paths=selected_paths)
  279. return UserSelection(selected_paths, selected_paths[0].course_links[course_id - 1])
  280. except Exception as e:
  281. print(e)
  282. if click.prompt("Exit", default=False, show_default=True, type=bool):
  283. return UserSelection([], is_exit=True)
  284. # endregion
  285. def get_credential() -> Tuple[str,str]:
  286. if not os.path.exists(CREDENTIAL_FILE_PATH):
  287. print("CREATING NEW PLURALSIGHT ACCOUNT")
  288. create_pluralsight_account(CREDENTIAL_FILE_PATH)
  289. print("SUCCESS! NEW PLURALSIGHT ACCOUNT CREATED.")
  290. with open(CREDENTIAL_FILE_PATH, 'r') as account_file:
  291. lines = account_file.readlines()
  292. credential = lines[0].rstrip(), lines[1].rstrip()
  293. return credential
  294. def main():
  295. global SAVE_DIRECTORY_PATH
  296. all_pluralsight_paths_list = get_all_pluralsight_paths(JSON_FILE_URL)
  297. if prompt_download_all():
  298. download_all(all_pluralsight_paths_list)
  299. return
  300. user_selection = prompt_paths_and_courses_selection(all_pluralsight_paths_list)
  301. if user_selection.is_exit:
  302. return
  303. while True:
  304. email_address, password = get_credential()
  305. if user_selection.selected_course_link:
  306. download_course(course_link=user_selection.selected_course_link, username=email_address, password=password)
  307. else:
  308. for path in user_selection.selected_paths:
  309. SAVE_DIRECTORY_PATH = get_directory_full_path(SAVE_DIRECTORY_PATH, path)
  310. for course_link in path.course_links:
  311. download_course(course_link=course_link, username=email_address, password=password)
  312. if __name__ == '__main__':
  313. main()