You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
196 lines
4.9 KiB
196 lines
4.9 KiB
package apiclassic |
|
|
|
import ( |
|
"github.com/PuerkitoBio/goquery" |
|
"errors" |
|
"strconv" |
|
"time" |
|
"github.com/terorie/yt-mango/data" |
|
"regexp" |
|
"github.com/valyala/fastjson" |
|
"strings" |
|
) |
|
|
|
const likeBtnSelector = ".like-button-renderer-like-button-unclicked" |
|
const dislikeBtnSelector = ".like-button-renderer-dislike-button-unclicked" |
|
const viewCountSelector = "div .watch-view-count" |
|
const userInfoSelector = "div .yt-user-info" |
|
const channelNameSelector = ".yt-uix-sessionlink" |
|
|
|
var playerConfigErr = errors.New("failed to parse player config") |
|
|
|
type parseInfo struct { |
|
v *data.Video |
|
doc *goquery.Document |
|
} |
|
|
|
func (p *parseInfo) parse() error { |
|
if err := p.parseLikeDislike(); |
|
err != nil { return err } |
|
if err := p.parseViewCount(); |
|
err != nil { return err } |
|
if err := p.parseUploader(); |
|
err != nil { return err } |
|
if err := p.parseDescription(); |
|
err != nil { return err } |
|
if err := p.parsePlayerConfig(); |
|
err != nil { return err } |
|
if err := p.parseMetas(); |
|
err != nil { return err } |
|
return nil |
|
} |
|
|
|
func (p *parseInfo) parseLikeDislike() error { |
|
likeText := p.doc.Find(likeBtnSelector).First().Text() |
|
dislikeText := p.doc.Find(dislikeBtnSelector).First().Text() |
|
|
|
if len(likeText) == 0 || len(dislikeText) == 0 { |
|
return errors.New("failed to parse like buttons") |
|
} |
|
|
|
var err error |
|
p.v.Likes, err = extractNumber(likeText) |
|
if err != nil { return err } |
|
p.v.Dislikes, err = extractNumber(dislikeText) |
|
if err != nil { return err } |
|
|
|
return nil |
|
} |
|
|
|
func (p *parseInfo) parseViewCount() error { |
|
viewCountText := p.doc.Find(viewCountSelector).First().Text() |
|
viewCount, err := extractNumber(viewCountText) |
|
if err != nil { return err } |
|
p.v.Views = viewCount |
|
return nil |
|
} |
|
|
|
func (p *parseInfo) parseUploader() error { |
|
userInfo := p.doc.Find(userInfoSelector) |
|
userLinkNode := userInfo.Find(".yt-uix-sessionlink") |
|
|
|
// get link |
|
userLink, _ := userLinkNode.Attr("href") |
|
if userLink == "" { return errors.New("couldn't find channel link") } |
|
p.v.UploaderURL = "https://www.youtube.com" + userLink |
|
|
|
// get name |
|
channelName := userInfo.Find(channelNameSelector).Text() |
|
if channelName == "" { return errors.New("could not find channel name") } |
|
p.v.Uploader = channelName |
|
return nil |
|
} |
|
|
|
func (p *parseInfo) parseMetas() error { |
|
metas := p.doc.Find("meta") |
|
// For each <meta> |
|
for _, node := range metas.Nodes { |
|
// Attributes |
|
var content string |
|
var itemprop string |
|
var prop string |
|
|
|
// Parse attributes |
|
for _, attr := range node.Attr { |
|
switch attr.Key { |
|
case "property": prop = attr.Val |
|
case "itemprop": itemprop = attr.Val |
|
case "content": content = attr.Val |
|
} |
|
} |
|
|
|
// Content not set |
|
if len(content) == 0 { continue } |
|
|
|
// <meta property … |
|
if len(prop) != 0 { |
|
switch prop { |
|
case "og:title": |
|
p.v.Title = content |
|
case "og:video:tag": |
|
p.v.Tags = append(p.v.Tags, content) |
|
case "og:url": |
|
p.v.URL = content |
|
case "og:image": |
|
p.v.Thumbnail = content |
|
} |
|
continue |
|
} |
|
// <meta itemprop … |
|
if len(itemprop) != 0 { |
|
switch itemprop { |
|
case "datePublished": |
|
if val, err := time.Parse("2006-01-02", content); |
|
err == nil { p.v.UploadDate = val } |
|
case "genre": |
|
p.v.Genre = content |
|
case "channelId": |
|
p.v.UploaderID = content |
|
case "duration": |
|
if val, err := parseDuration(content); err == nil { |
|
p.v.Duration = val |
|
} else { |
|
return err |
|
} |
|
case "isFamilyFriendly": |
|
if val, err := strconv.ParseBool(content); |
|
err == nil { p.v.FamilyFriendly = val } |
|
} |
|
continue |
|
} |
|
} |
|
return nil |
|
} |
|
|
|
func (p *parseInfo) parsePlayerConfig() error { |
|
var json string |
|
|
|
p.doc.Find("script").EachWithBreak(func(_ int, s *goquery.Selection) bool { |
|
script := s.Text() |
|
startMatch := regexp.MustCompile("var ytplayer = ytplayer \\|\\| {};\\s*ytplayer\\.config = {") |
|
endMatch := regexp.MustCompile("};\\s*ytplayer.load = function\\(") |
|
|
|
startIndices := startMatch.FindStringIndex(script) |
|
if startIndices == nil { return true } |
|
endIndices := endMatch.FindStringIndex(script) |
|
if endIndices == nil { return true } |
|
|
|
// minus one to preserve braces |
|
startIndex, endIndex := startIndices[1] - 1, endIndices[0] + 1 |
|
if startIndex > endIndex { return true } |
|
|
|
json = script[startIndex:endIndex] |
|
|
|
// Stop searching, json found |
|
return false |
|
}) |
|
// No json found |
|
if json == "" { return playerConfigErr } |
|
|
|
// Try decoding json |
|
var parser fastjson.Parser |
|
config, err := parser.Parse(json) |
|
if err != nil { return err } |
|
|
|
// Extract data |
|
args := config.Get("args") |
|
if args == nil { return playerConfigErr } |
|
|
|
// Get fmt_list string |
|
fmtList := args.GetStringBytes("fmt_list") |
|
if fmtList == nil { return playerConfigErr } |
|
|
|
// Split and decode it |
|
fmts := strings.Split(string(fmtList), ",") |
|
for _, fmt := range fmts { |
|
parts := strings.Split(fmt, "/") |
|
if len(parts) != 2 { return playerConfigErr } |
|
formatID := parts[0] |
|
// Look up the format ID |
|
format := data.FormatsById[formatID] |
|
if format == nil { return playerConfigErr } |
|
p.v.Formats = append(p.v.Formats, *format) |
|
} |
|
|
|
return nil |
|
}
|
|
|