Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

pluralsight.py 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
  1. import json
  2. import os
  3. import random
  4. import re
  5. import secrets
  6. import string
  7. import time
  8. from pathlib import Path
  9. from sys import platform
  10. from typing import Dict, List, Tuple
  11. import click
  12. import requests
  13. import youtube_dl
  14. from bs4 import BeautifulSoup
  15. from selenium import webdriver
  16. from selenium.webdriver.firefox.options import Options
  17. # region Global Constant(s) and Readonly Variable(s)
  18. # True/False to determine whether selenium instances will be visible or not (headless)
  19. HIDE_SELENIUM_INSTANCES = True
  20. # The maximum number of courses to download from a single account
  21. MAX_COURSE_DOWNLOAD_COUNT = 5
  22. # Denotes Time.Sleep() duration in seconds
  23. SELENIUM_SLEEP_DURATION = 5
  24. # Minimum number of seconds to wait between consecutive video downloads
  25. MIN_VIDEO_DOWNLOAD_DELAY = 1
  26. # Minimum number of seconds to wait between consecutive video downloads
  27. MAX_VIDEO_DOWNLOAD_DELAY = 10
  28. # Check if current OS/platform is Windows
  29. IS_WINDOWS = platform.startswith("win")
  30. # Master Directory Path (Default: Working Directory)
  31. MASTER_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
  32. # Path of the text file where pluralsight account details will be stored
  33. CREDENTIAL_FILE_PATH = os.path.join(MASTER_DIRECTORY, "credential.txt")
  34. # Path of the archive text file to log all downloaded videos
  35. ARCHIVE_FILE_PATH = os.path.join(MASTER_DIRECTORY, "archive.txt")
  36. # Progress text file path (Progress file stores progress of download_all())
  37. PROGRESS_FILE_PATH = os.path.join(MASTER_DIRECTORY, "progress.txt")
  38. # Path of the directory where downloaded courses will be saved
  39. SAVE_DIRECTORY_PATH = os.path.join(MASTER_DIRECTORY, "Courses")
  40. # JSON File (.json) containing the JSON of the paths/courses dump of Pluralsight
  41. # JSON_FILE_URL = os.path.join(WORKING_DIRECTORY, "pluralsight.json")
  42. JSON_FILE_URL = "https://git.teknik.io/CanWePlsRapeTheShitOuttaPluralsight/RapePluralsight/raw/branch/master/pluralsight.json"
  43. # Options for youtube-dl. For a complete list of options, check https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312
  44. ydl_options = {
  45. 'writesubtitles': True,
  46. 'nooverwrites': True,
  47. 'sleep_interval': MIN_VIDEO_DOWNLOAD_DELAY,
  48. 'max_sleep_interval': MAX_VIDEO_DOWNLOAD_DELAY,
  49. 'download_archive': ARCHIVE_FILE_PATH
  50. }
  51. # endregion
  52. class PluralsightPath(object):
  53. def __init__(self, id: int, link: str, title: str, slug: str, course_links: List[str]):
  54. self.id = id
  55. self.link = link
  56. self.title = title
  57. self.slug = slug
  58. self.course_links = course_links
  59. class UserSelection:
  60. def __init__(self, selected_paths: List[PluralsightPath], selected_course_link: str = "", is_exit: bool = False):
  61. self.selected_paths = selected_paths
  62. self.selected_course_link = selected_course_link
  63. self.is_exit = is_exit
  64. class DisposableMail:
  65. """
  66. This class is used to generate random disposable emails
  67. """
  68. def __init__(self):
  69. self.email_address = requests.post("https://gmailnator.com/index/indexquery", {'action': 'GenerateEmail'}).text
  70. def get_mail_item_id(self) -> object:
  71. post_url = "https://gmailnator.com/mailbox/mailboxquery"
  72. post_data = {
  73. 'action': 'LoadMailList',
  74. 'Email_address': self.email_address
  75. }
  76. while True:
  77. try:
  78. time.sleep(1)
  79. response_text = requests.post(post_url, post_data).json()[0]['content']
  80. result = re.findall('#(.*)\\">', response_text)
  81. mail_id = result[0]
  82. return mail_id
  83. except:
  84. pass
  85. def get_verification_link(self) -> str:
  86. post_url = "https://gmailnator.com/mailbox/get_single_message/"
  87. post_data = {
  88. 'action': 'LoadMailList',
  89. 'message_id': self.get_mail_item_id(),
  90. 'email': self.email_address.split("+")[0]
  91. }
  92. response_data = requests.post(post_url, post_data).text
  93. soup = BeautifulSoup(response_data)
  94. for link in soup.findAll('a', href=True):
  95. if "https://app.pluralsight.com/id/forgotpassword/reset?token" in link['href']:
  96. return link['href']
  97. class Pluralsight:
  98. """
  99. This class handles the registration and verification of new Pluralsight accounts
  100. """
  101. def __init__(self, email: str, password: str, is_headless: bool = True):
  102. if is_headless:
  103. options = Options()
  104. options.add_argument("--headless")
  105. self.driver = webdriver.Firefox(options=options)
  106. else:
  107. self.driver = webdriver.Firefox()
  108. self.email = email
  109. self.password = password
  110. def __enter__(self):
  111. return self
  112. def __exit__(self, exc_type, exc_val, exc_tb):
  113. self.driver.quit()
  114. @staticmethod
  115. def get_name() -> Tuple[str, str]:
  116. """
  117. Generate a random string to be used as first or last name
  118. Returns:
  119. str: Generated string
  120. """
  121. request_url = "http://names.drycodes.com/1?nameOptions=boy_names" if random.choice([True, False]) \
  122. else "http://names.drycodes.com/1?nameOptions=girl_names"
  123. first_last_name = requests.get(request_url).text.strip('"[]').split('_')
  124. return first_last_name
  125. def register(self) -> None:
  126. """
  127. Registers new Pluralsight account
  128. """
  129. self.driver.get("https://www.pluralsight.com/offer/2020/free-april-month")
  130. time.sleep(SELENIUM_SLEEP_DURATION)
  131. accept_cookie_button_element = self.driver.find_element_by_class_name("cookie_notification--opt_in")
  132. accept_cookie_button_element.click()
  133. time.sleep(1)
  134. sign_up_now_button_element = self.driver.find_element_by_xpath('//a[@data-aa-title="Free-April-Start-Now"]')
  135. sign_up_now_button_element.click()
  136. time.sleep(1)
  137. email_input_element = self.driver.find_element_by_name("email")
  138. firstname_input_element = self.driver.find_element_by_name("firstname")
  139. lastname_input_element = self.driver.find_element_by_name("lastname")
  140. tos_checkbox_element = self.driver.find_element_by_name("optInBox")
  141. firstName, lastName = self.get_name()
  142. email_input_element.send_keys(self.email)
  143. firstname_input_element.send_keys(firstName)
  144. lastname_input_element.send_keys(lastName)
  145. tos_checkbox_element.click()
  146. time.sleep(SELENIUM_SLEEP_DURATION)
  147. create_account_button_element = self.driver.find_element_by_xpath(
  148. "//*[contains(text(), 'I agree, activate benefit')]")
  149. create_account_button_element.click()
  150. time.sleep(30)
  151. cancel_button_element = self.driver.find_element_by_class_name("cancelButton---CKAut")
  152. cancel_button_element.click()
  153. time.sleep(SELENIUM_SLEEP_DURATION)
  154. def set_password(self, verification_link: str) -> None:
  155. """
  156. Sets password in the given verification link
  157. Args:
  158. verification_link: The verification link (as string) to set up password
  159. """
  160. self.driver.get(verification_link)
  161. time.sleep(SELENIUM_SLEEP_DURATION)
  162. password_input_element = self.driver.find_element_by_id("Password")
  163. password_confirm_input_element = self.driver.find_element_by_id("PasswordConfirmation")
  164. save_button_element = self.driver.find_element_by_class_name("psds-button--appearance-primary")
  165. password_input_element.send_keys(self.password)
  166. password_confirm_input_element.send_keys(self.password)
  167. time.sleep(1)
  168. save_button_element.click()
  169. time.sleep(SELENIUM_SLEEP_DURATION)
  170. class Progress:
  171. def __init__(self, id: int, is_all: bool):
  172. self.id = id
  173. self.course_link = ""
  174. self.is_all = is_all
  175. def get_password(min_length: int = 25, max_length: int = 50) -> str:
  176. """
  177. Generates a random password using ascii letters and numerical digits
  178. Args:
  179. min_length: Minimum length of the password, default is 25
  180. max_length: Minimum length of the password, default is 50
  181. Returns: Generated password as string
  182. """
  183. length = random.randint(min_length, max_length)
  184. alphabet = string.ascii_letters + string.digits
  185. password = ''.join(secrets.choice(alphabet) for _ in range(length))
  186. return password
  187. def create_pluralsight_account(credential_file_path: str) -> Dict[str, str]:
  188. """
  189. Creates new Pluralsight account and returns the email/password as a dictionary
  190. Returns:
  191. Dict[str, str]: Dictionary containing email and password with eponymous pair keys
  192. """
  193. disposable_email = DisposableMail()
  194. password = get_password()
  195. with Pluralsight(email=disposable_email.email_address, password=password,
  196. is_headless=HIDE_SELENIUM_INSTANCES) as ps:
  197. ps.register()
  198. verification_link = disposable_email.get_verification_link()
  199. ps.set_password(verification_link=verification_link)
  200. time.sleep(SELENIUM_SLEEP_DURATION)
  201. with open(credential_file_path, 'w+') as account_file:
  202. account_file.write(f"{disposable_email.email_address}\n")
  203. account_file.write(f"{password}\n")
  204. return {'email': disposable_email, 'password': password}
  205. def download_course(course_link: str, username: str, password: str, save_directory_path: str) -> bool:
  206. """
  207. Download the given course using the provided credential
  208. Args:
  209. course_link: The link of the course to download
  210. username: Username (Email) of the Pluralsight account to be used for download
  211. password: Password of the Pluralsight account to be used for download
  212. save_directory_path: Absolute path of Root save directory
  213. Returns: True/False bool value denoting the success status of the download
  214. """
  215. try:
  216. ydl_options['username'] = username
  217. ydl_options['password'] = password
  218. if IS_WINDOWS:
  219. ydl_options[
  220. 'outtmpl'] = f"{save_directory_path}\\%(playlist)s\\%(chapter_number)s - %(chapter)s\\%(playlist_index)s - %(title)s.%(ext)s"
  221. else:
  222. ydl_options[
  223. 'outtmpl'] = f"{save_directory_path}/%(playlist)s/%(chapter_number)s - %(chapter)s/%(playlist_index)s - %(title)s.%(ext)s"
  224. with youtube_dl.YoutubeDL(ydl_options) as ydl:
  225. ydl.download([course_link])
  226. return True
  227. except:
  228. return False
  229. def get_all_pluralsight_paths(url: str) -> List[PluralsightPath]:
  230. if url.startswith("http"):
  231. json_string = requests.get(url).text
  232. else:
  233. json_string = Path(JSON_FILE_URL).read_text()
  234. all_pluralsight_paths_dicts_list = json.loads(json_string)
  235. all_pluralsight_paths_list = [PluralsightPath(**dict) for dict in all_pluralsight_paths_dicts_list]
  236. return all_pluralsight_paths_list
  237. def print_pluralsight_paths_and_courses(pluralsight_paths_list: List[PluralsightPath],
  238. search_query: str) -> List[PluralsightPath]:
  239. queried_paths_list = []
  240. for pluralsight_path in pluralsight_paths_list:
  241. if search_query.lower() not in pluralsight_path.title.lower():
  242. continue
  243. print(f"{pluralsight_path.id} | {pluralsight_path.title}")
  244. course_serial = 1
  245. print("\t0 - [DOWNLOAD ALL COURSES]")
  246. for course_link in pluralsight_path.course_links:
  247. print(f"\t{course_serial} - {course_link}")
  248. course_serial += 1
  249. queried_paths_list.append(pluralsight_path)
  250. print("0 | [DOWNLOAD ALL PATHS]")
  251. return queried_paths_list
  252. def get_directory_full_path(root_directory_path: str, pluralsight_path: PluralsightPath) -> str:
  253. directory_name = f"{pluralsight_path.id:03d} - {pluralsight_path.slug}" if IS_WINDOWS \
  254. else f"{pluralsight_path.id:03d} - {pluralsight_path.title}"
  255. directory_full_path = os.path.join(root_directory_path, directory_name)
  256. return directory_full_path
  257. def download_pluralsight_paths(pluralsight_paths=List[PluralsightPath]):
  258. pass
  259. # region Click Prompts
  260. def prompt_download_all() -> bool:
  261. is_download_all = click.prompt("Download All? ", default=False, show_default=True, type=bool, prompt_suffix="")
  262. return is_download_all
  263. def prompt_paths_and_courses_selection(all_pluralsight_paths_list: List[PluralsightPath]) -> UserSelection:
  264. while True:
  265. try:
  266. search_query = click.prompt("Search Query", type=str)
  267. queried_paths = print_pluralsight_paths_and_courses(all_pluralsight_paths_list, search_query)
  268. selected_paths = queried_paths
  269. while True:
  270. path_id = click.prompt("Select Path (by id)", type=int, default=0, show_default=True)
  271. if path_id == 0:
  272. return UserSelection(selected_paths=queried_paths)
  273. for path in queried_paths:
  274. if path_id == path.id:
  275. selected_paths = [path]
  276. break
  277. else:
  278. continue
  279. break
  280. while True:
  281. course_id = click.prompt("Select Course (by id)", type=int, default=0, show_default=True)
  282. if 0 <= course_id <= len(selected_paths[0].course_links):
  283. break
  284. if course_id == 0:
  285. return UserSelection(selected_paths=selected_paths)
  286. return UserSelection(selected_paths, selected_paths[0].course_links[course_id - 1])
  287. except Exception as e:
  288. print(e)
  289. if click.prompt("Exit", default=False, show_default=True, type=bool):
  290. return UserSelection([], is_exit=True)
  291. # endregion
  292. def get_credential() -> Tuple[str, str]:
  293. if not os.path.exists(CREDENTIAL_FILE_PATH):
  294. print("CREATING NEW PLURALSIGHT ACCOUNT")
  295. create_pluralsight_account(CREDENTIAL_FILE_PATH)
  296. print("SUCCESS! NEW PLURALSIGHT ACCOUNT CREATED.")
  297. with open(CREDENTIAL_FILE_PATH, 'r') as account_file:
  298. lines = account_file.readlines()
  299. credential = lines[0].rstrip(), lines[1].rstrip()
  300. return credential
  301. def save_progress(path_id: int, course_index: int):
  302. with open(PROGRESS_FILE_PATH, 'w+') as progress_file:
  303. progress_file.write(f"{path_id}|{course_index}")
  304. def download_all(all_pluralsight_paths: List[PluralsightPath]):
  305. global SAVE_DIRECTORY_PATH
  306. while True:
  307. try:
  308. if not os.path.isfile(PROGRESS_FILE_PATH):
  309. save_progress(path_id=1, course_index=0)
  310. current_path_id, current_course_index = map(int, Path(PROGRESS_FILE_PATH).read_text().rstrip().split("|"))
  311. while current_path_id <= len(all_pluralsight_paths):
  312. email, password = get_credential()
  313. for pluralsight_path in all_pluralsight_paths:
  314. if int(current_path_id) == pluralsight_path.id:
  315. while current_course_index < len(pluralsight_path.course_links):
  316. save_directory_path = get_directory_full_path(SAVE_DIRECTORY_PATH, pluralsight_path)
  317. course_link = pluralsight_path.course_links[current_course_index]
  318. download_result = download_course(course_link, email, password, save_directory_path)
  319. if not download_result:
  320. raise Exception("Failed to download course")
  321. current_course_index += 1
  322. save_progress(current_path_id, current_course_index)
  323. current_path_id += 1
  324. current_course_index = 0
  325. save_progress(current_path_id, current_course_index)
  326. break
  327. except(KeyboardInterrupt, SystemExit):
  328. print("EXITING PROGRAM")
  329. break
  330. except Exception as exception:
  331. os.remove(CREDENTIAL_FILE_PATH)
  332. print(exception)
  333. def main():
  334. global SAVE_DIRECTORY_PATH
  335. all_pluralsight_paths_list = get_all_pluralsight_paths(JSON_FILE_URL)
  336. if prompt_download_all():
  337. download_all(all_pluralsight_paths_list)
  338. return
  339. user_selection = prompt_paths_and_courses_selection(all_pluralsight_paths_list)
  340. if user_selection.is_exit:
  341. return
  342. while True:
  343. email_address, password = get_credential()
  344. if user_selection.selected_course_link:
  345. download_course(course_link=user_selection.selected_course_link, username=email_address, password=password)
  346. else:
  347. for path in user_selection.selected_paths:
  348. save_directory_path = get_directory_full_path(SAVE_DIRECTORY_PATH, path)
  349. for course_link in path.course_links:
  350. download_course(course_link=course_link,
  351. username=email_address,
  352. password=password,
  353. save_directory_path=save_directory_path)
  354. if __name__ == '__main__':
  355. main()