You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parse.go 4.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. package classic
  2. import (
  3. "github.com/PuerkitoBio/goquery"
  4. "errors"
  5. "strconv"
  6. "time"
  7. "github.com/terorie/yt-mango/data"
  8. "regexp"
  9. "github.com/valyala/fastjson"
  10. "strings"
  11. )
  12. const likeBtnSelector = ".like-button-renderer-like-button-unclicked"
  13. const dislikeBtnSelector = ".like-button-renderer-dislike-button-unclicked"
  14. const viewCountSelector = "div .watch-view-count"
  15. const userInfoSelector = "div .yt-user-info"
  16. const channelNameSelector = ".yt-uix-sessionlink"
  17. var playerConfigErr = errors.New("failed to parse player config")
  18. type parseInfo struct {
  19. v *data.Video
  20. doc *goquery.Document
  21. }
  22. func (p *parseInfo) parse() error {
  23. if err := p.parseLikeDislike();
  24. err != nil { return err }
  25. if err := p.parseViewCount();
  26. err != nil { return err }
  27. if err := p.parseUploader();
  28. err != nil { return err }
  29. if err := p.parseDescription();
  30. err != nil { return err }
  31. if err := p.parsePlayerConfig();
  32. err != nil { return err }
  33. if err := p.parseMetas();
  34. err != nil { return err }
  35. return nil
  36. }
  37. func (p *parseInfo) parseLikeDislike() error {
  38. likeText := p.doc.Find(likeBtnSelector).First().Text()
  39. dislikeText := p.doc.Find(dislikeBtnSelector).First().Text()
  40. if len(likeText) == 0 || len(dislikeText) == 0 {
  41. return errors.New("failed to parse like buttons")
  42. }
  43. var err error
  44. p.v.Likes, err = extractNumber(likeText)
  45. if err != nil { return err }
  46. p.v.Dislikes, err = extractNumber(dislikeText)
  47. if err != nil { return err }
  48. return nil
  49. }
  50. func (p *parseInfo) parseViewCount() error {
  51. viewCountText := p.doc.Find(viewCountSelector).First().Text()
  52. viewCount, err := extractNumber(viewCountText)
  53. if err != nil { return err }
  54. p.v.Views = viewCount
  55. return nil
  56. }
  57. func (p *parseInfo) parseUploader() error {
  58. userInfo := p.doc.Find(userInfoSelector)
  59. userLinkNode := userInfo.Find(".yt-uix-sessionlink")
  60. // get link
  61. userLink, _ := userLinkNode.Attr("href")
  62. if userLink == "" { return errors.New("couldn't find channel link") }
  63. p.v.UploaderURL = "https://www.youtube.com" + userLink
  64. // get name
  65. channelName := userInfo.Find(channelNameSelector).Text()
  66. if channelName == "" { return errors.New("could not find channel name") }
  67. p.v.Uploader = channelName
  68. return nil
  69. }
  70. func (p *parseInfo) parseMetas() error {
  71. metas := p.doc.Find("meta")
  72. // For each <meta>
  73. for _, node := range metas.Nodes {
  74. // Attributes
  75. var content string
  76. var itemprop string
  77. var prop string
  78. // Parse attributes
  79. for _, attr := range node.Attr {
  80. switch attr.Key {
  81. case "property": prop = attr.Val
  82. case "itemprop": itemprop = attr.Val
  83. case "content": content = attr.Val
  84. }
  85. }
  86. // Content not set
  87. if len(content) == 0 { continue }
  88. // <meta property …
  89. if len(prop) != 0 {
  90. switch prop {
  91. case "og:title":
  92. p.v.Title = content
  93. case "og:video:tag":
  94. p.v.Tags = append(p.v.Tags, content)
  95. case "og:url":
  96. p.v.URL = content
  97. case "og:image":
  98. p.v.Thumbnail = content
  99. }
  100. continue
  101. }
  102. // <meta itemprop …
  103. if len(itemprop) != 0 {
  104. switch itemprop {
  105. case "datePublished":
  106. if val, err := time.Parse("2006-01-02", content);
  107. err == nil { p.v.UploadDate = val }
  108. case "genre":
  109. p.v.Genre = content
  110. case "channelId":
  111. p.v.UploaderID = content
  112. case "duration":
  113. if val, err := parseDuration(content); err == nil {
  114. p.v.Duration = val
  115. } else {
  116. return err
  117. }
  118. case "isFamilyFriendly":
  119. if val, err := strconv.ParseBool(content);
  120. err == nil { p.v.FamilyFriendly = val }
  121. }
  122. continue
  123. }
  124. }
  125. return nil
  126. }
  127. func (p *parseInfo) parsePlayerConfig() error {
  128. var json string
  129. p.doc.Find("script").EachWithBreak(func(_ int, s *goquery.Selection) bool {
  130. script := s.Text()
  131. startMatch := regexp.MustCompile("var ytplayer = ytplayer \\|\\| {};\\s*ytplayer\\.config = {")
  132. endMatch := regexp.MustCompile("};\\s*ytplayer.load = function\\(")
  133. startIndices := startMatch.FindStringIndex(script)
  134. if startIndices == nil { return true }
  135. endIndices := endMatch.FindStringIndex(script)
  136. if endIndices == nil { return true }
  137. // minus one to preserve braces
  138. startIndex, endIndex := startIndices[1] - 1, endIndices[0] + 1
  139. if startIndex > endIndex { return true }
  140. json = script[startIndex:endIndex]
  141. // Stop searching, json found
  142. return false
  143. })
  144. // No json found
  145. if json == "" { return playerConfigErr }
  146. // Try decoding json
  147. var parser fastjson.Parser
  148. config, err := parser.Parse(json)
  149. if err != nil { return err }
  150. // Extract data
  151. args := config.Get("args")
  152. if args == nil { return playerConfigErr }
  153. // Get fmt_list string
  154. fmtList := args.GetStringBytes("fmt_list")
  155. if fmtList == nil { return playerConfigErr }
  156. // Split and decode it
  157. fmts := strings.Split(string(fmtList), ",")
  158. for _, fmt := range fmts {
  159. parts := strings.Split(fmt, "/")
  160. if len(parts) != 2 { return playerConfigErr }
  161. formatID := parts[0]
  162. // Look up the format ID
  163. format := data.FormatsById[formatID]
  164. if format == nil { return playerConfigErr }
  165. p.v.Formats = append(p.v.Formats, *format)
  166. }
  167. return nil
  168. }