You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parsevideo.go 4.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. package apiclassic
  2. import (
  3. "github.com/PuerkitoBio/goquery"
  4. "errors"
  5. "strconv"
  6. "time"
  7. "github.com/terorie/yt-mango/data"
  8. "regexp"
  9. "github.com/valyala/fastjson"
  10. "strings"
  11. "net/http"
  12. )
  13. const likeBtnSelector = ".like-button-renderer-like-button-unclicked"
  14. const dislikeBtnSelector = ".like-button-renderer-dislike-button-unclicked"
  15. const viewCountSelector = "div .watch-view-count"
  16. const userInfoSelector = "div .yt-user-info"
  17. const channelNameSelector = ".yt-uix-sessionlink"
  18. var playerConfigErr = errors.New("failed to parse player config")
  19. func ParseVideo(v *data.Video, res *http.Response) (err error) {
  20. if res.StatusCode != 200 { return errors.New("HTTP failure") }
  21. defer res.Body.Close()
  22. doc, err := goquery.NewDocumentFromReader(res.Body)
  23. if err != nil { return }
  24. p := parseVideoInfo{v, doc}
  25. return p.parse()
  26. }
  27. type parseVideoInfo struct {
  28. v *data.Video
  29. doc *goquery.Document
  30. }
  31. func (p *parseVideoInfo) parse() error {
  32. if err := p.parseLikeDislike();
  33. err != nil { return err }
  34. if err := p.parseViewCount();
  35. err != nil { return err }
  36. if err := p.parseUploader();
  37. err != nil { return err }
  38. if err := p.parseDescription();
  39. err != nil { return err }
  40. if err := p.parsePlayerConfig();
  41. err != nil { return err }
  42. if err := p.parseMetas();
  43. err != nil { return err }
  44. return nil
  45. }
  46. func (p *parseVideoInfo) parseLikeDislike() error {
  47. likeText := p.doc.Find(likeBtnSelector).First().Text()
  48. dislikeText := p.doc.Find(dislikeBtnSelector).First().Text()
  49. if len(likeText) == 0 || len(dislikeText) == 0 {
  50. return errors.New("failed to parse like buttons")
  51. }
  52. var err error
  53. p.v.Likes, err = extractNumber(likeText)
  54. if err != nil { return err }
  55. p.v.Dislikes, err = extractNumber(dislikeText)
  56. if err != nil { return err }
  57. return nil
  58. }
  59. func (p *parseVideoInfo) parseViewCount() error {
  60. viewCountText := p.doc.Find(viewCountSelector).First().Text()
  61. viewCount, err := extractNumber(viewCountText)
  62. if err != nil { return err }
  63. p.v.Views = viewCount
  64. return nil
  65. }
  66. func (p *parseVideoInfo) parseUploader() error {
  67. userInfo := p.doc.Find(userInfoSelector)
  68. userLinkNode := userInfo.Find(".yt-uix-sessionlink")
  69. // get link
  70. userLink, _ := userLinkNode.Attr("href")
  71. if userLink == "" { return errors.New("couldn't find channel link") }
  72. p.v.UploaderURL = "https://www.youtube.com" + userLink
  73. // get name
  74. channelName := userInfo.Find(channelNameSelector).Text()
  75. if channelName == "" { return errors.New("could not find channel name") }
  76. p.v.Uploader = channelName
  77. return nil
  78. }
  79. func (p *parseVideoInfo) parseMetas() (err error) {
  80. enumMetas(p.doc.Selection, func(tag metaTag)bool {
  81. content := tag.content
  82. switch tag.typ {
  83. case metaProperty:
  84. switch tag.name {
  85. case "og:title":
  86. p.v.Title = content
  87. case "og:video:tag":
  88. p.v.Tags = append(p.v.Tags, content)
  89. case "og:url":
  90. p.v.URL = content
  91. case "og:image":
  92. p.v.Thumbnail = content
  93. }
  94. case metaItemProp:
  95. switch tag.name {
  96. case "datePublished":
  97. if val, err := time.Parse("2006-01-02", content);
  98. err == nil { p.v.UploadDate = val }
  99. case "genre":
  100. p.v.Genre = content
  101. case "channelId":
  102. p.v.UploaderID = content
  103. case "duration":
  104. if val, err := parseDuration(content); err == nil {
  105. p.v.Duration = val
  106. } else {
  107. return false
  108. }
  109. case "isFamilyFriendly":
  110. if val, err := strconv.ParseBool(content);
  111. err == nil { p.v.FamilyFriendly = val }
  112. }
  113. }
  114. return true
  115. })
  116. return err
  117. }
  118. func (p *parseVideoInfo) parsePlayerConfig() error {
  119. var json string
  120. p.doc.Find("script").EachWithBreak(func(_ int, s *goquery.Selection) bool {
  121. script := s.Text()
  122. startMatch := regexp.MustCompile("var ytplayer = ytplayer \\|\\| {};\\s*ytplayer\\.config = {")
  123. endMatch := regexp.MustCompile("};\\s*ytplayer.load = function\\(")
  124. startIndices := startMatch.FindStringIndex(script)
  125. if startIndices == nil { return true }
  126. endIndices := endMatch.FindStringIndex(script)
  127. if endIndices == nil { return true }
  128. // minus one to preserve braces
  129. startIndex, endIndex := startIndices[1] - 1, endIndices[0] + 1
  130. if startIndex > endIndex { return true }
  131. json = script[startIndex:endIndex]
  132. // Stop searching, json found
  133. return false
  134. })
  135. // No json found
  136. if json == "" { return playerConfigErr }
  137. // Try decoding json
  138. var parser fastjson.Parser
  139. config, err := parser.Parse(json)
  140. if err != nil { return err }
  141. // Extract data
  142. args := config.Get("args")
  143. if args == nil { return playerConfigErr }
  144. // Get fmt_list string
  145. fmtList := args.GetStringBytes("fmt_list")
  146. if fmtList == nil { return playerConfigErr }
  147. // Split and decode it
  148. fmts := strings.Split(string(fmtList), ",")
  149. for _, fmt := range fmts {
  150. parts := strings.Split(fmt, "/")
  151. if len(parts) != 2 { return playerConfigErr }
  152. formatID := parts[0]
  153. // Look up the format ID
  154. format := data.FormatsById[formatID]
  155. if format == nil { return playerConfigErr }
  156. p.v.Formats = append(p.v.Formats, *format)
  157. }
  158. return nil
  159. }