Browse Source

Decode video formats

master
terorie 1 year ago
parent
commit
bafc6c4ad3
4 changed files with 140 additions and 44 deletions
  1. 70
    9
      classic/parse.go
  2. 7
    13
      classic/util.go
  3. 61
    20
      data/format.go
  4. 2
    2
      data/video.go

+ 70
- 9
classic/parse.go View File

@@ -6,6 +6,9 @@ import (
"strconv"
"time"
"github.com/terorie/youtube-mango/data"
"regexp"
"github.com/valyala/fastjson"
"strings"
)

const likeBtnSelector = ".like-button-renderer-like-button-unclicked"
@@ -14,6 +17,8 @@ const viewCountSelector = "div .watch-view-count"
const userInfoSelector = "div .yt-user-info"
const channelNameSelector = ".yt-uix-sessionlink"

var playerConfigErr = errors.New("failed to parse player config")

type parseInfo struct {
v *data.Video
doc *goquery.Document
@@ -28,9 +33,10 @@ func (p *parseInfo) parse() error {
err != nil { return err }
if err := p.parseDescription();
err != nil { return err }

p.parseMetas()

if err := p.parsePlayerConfig();
err != nil { return err }
if err := p.parseMetas();
err != nil { return err }
return nil
}

@@ -75,7 +81,7 @@ func (p *parseInfo) parseUploader() error {
return nil
}

func (p *parseInfo) parseMetas() {
func (p *parseInfo) parseMetas() error {
metas := p.doc.Find("meta")
// For each <meta>
for _, node := range metas.Nodes {
@@ -94,9 +100,7 @@ func (p *parseInfo) parseMetas() {
}

// Content not set
if len(content) == 0 {
continue
}
if len(content) == 0 { continue }

// <meta property …
if len(prop) != 0 {
@@ -123,8 +127,11 @@ func (p *parseInfo) parseMetas() {
case "channelId":
p.v.UploaderID = content
case "duration":
if val, err := parseDuration(content);
err == nil { p.v.Duration = val }
if val, err := parseDuration(content); err == nil {
p.v.Duration = val
} else {
return err
}
case "isFamilyFriendly":
if val, err := strconv.ParseBool(content);
err == nil { p.v.FamilyFriendly = val }
@@ -132,4 +139,58 @@ func (p *parseInfo) parseMetas() {
continue
}
}
return nil
}

func (p *parseInfo) parsePlayerConfig() error {
var json string

p.doc.Find("script").EachWithBreak(func(_ int, s *goquery.Selection) bool {
script := s.Text()
startMatch := regexp.MustCompile("var ytplayer = ytplayer \\|\\| {};\\s*ytplayer\\.config = {")
endMatch := regexp.MustCompile("};\\s*ytplayer.load = function\\(")

startIndices := startMatch.FindStringIndex(script)
if startIndices == nil { return true }
endIndices := endMatch.FindStringIndex(script)
if endIndices == nil { return true }

// minus one to preserve braces
startIndex, endIndex := startIndices[1] - 1, endIndices[0] + 1
if startIndex > endIndex { return true }

json = script[startIndex:endIndex]

// Stop searching, json found
return false
})
// No json found
if json == "" { return playerConfigErr }

// Try decoding json
var parser fastjson.Parser
config, err := parser.Parse(json)
if err != nil { return err }

// Extract data
args := config.Get("args")
if args == nil { return playerConfigErr }

// Get fmt_list string
fmtList := args.GetStringBytes("fmt_list")
if fmtList == nil { return playerConfigErr }

// Split and decode it
fmts := strings.Split(string(fmtList), ",")
for _, fmt := range fmts {
parts := strings.Split(fmt, "/")
if len(parts) != 2 { return playerConfigErr }
formatID := parts[0]
// Look up the format ID
format := data.FormatsById[formatID]
if format == nil { return playerConfigErr }
p.v.Formats = append(p.v.Formats, *format)
}

return nil
}

+ 7
- 13
classic/util.go View File

@@ -1,31 +1,25 @@
package classic

import (
"time"
"errors"
"strings"
"strconv"
)

// "PT6M57S" => 6 min 57 s
func parseDuration(d string) (time.Duration, error) {
var err error
goto start

error:
return 0, errors.New("unknown duration code")
var durationErr = errors.New("unknown duration code")

start:
if d[0:2] != "PT" { goto error }
// "PT6M57S" => 6 min 57 s
func parseDuration(d string) (uint64, error) {
if d[0:2] != "PT" { return 0, durationErr }
mIndex := strings.IndexByte(d, 'M')
if mIndex == -1 { goto error }
if mIndex == -1 { return 0, durationErr }

minutes, err := strconv.ParseUint(d[2:mIndex], 10, 32)
if err != nil { return 0, err }
seconds, err := strconv.ParseUint(d[mIndex:len(d)-1], 10, 32)
seconds, err := strconv.ParseUint(d[mIndex+1:len(d)-1], 10, 32)
if err != nil { return 0, err }

dur := time.Duration(minutes) * time.Minute + time.Duration(seconds) * time.Second
dur := minutes * 60 + seconds
return dur, nil
}


+ 61
- 20
data/format.go View File

@@ -1,5 +1,7 @@
package data

import "encoding/json"

type FormatType uint8

const (
@@ -13,22 +15,45 @@ const (
)

type Format struct {
FormatID string
Extension string
Width uint32
Height uint32
VideoCodec string
AudioCodec string
AudioBitrate uint32
Flags FormatType
ID string `json:"id"`
Extension string `json:"ext"`
Width uint32 `json:"width"`
Height uint32 `json:"height"`
VideoCodec string `json:"vcodec"`
AudioCodec string `json:"acodec"`
AudioBitrate uint32 `json:"abr"`
Flags FormatType `json:"flags"`
}

var FormatsById map[string]*Format

func init() {
ids := []string {
"5", "6", "13", "17", "18", "22", "34", "35",
"36", "37", "38", "43", "44", "45", "46", "59",
"78", "82", "83", "84", "85", "100", "101", "102",
"91", "92", "93", "94", "95", "96", "132", "151",
"133", "134", "135", "136", "137", "138", "160", "212",
"264", "298", "299", "266", "139", "140", "141", "256",
"258", "325", "328", "167", "168", "169", "170", "218",
"219", "278", "242", "243", "244", "245", "246", "247",
"248", "271", "272", "302", "303", "308", "313", "315",
"171", "172", "249", "250", "251",
}
FormatsById = make(map[string]*Format)
for i, id := range ids {
format := &Formats[i]
if format.ID != id { panic("misaligned IDs: " + id + "/" + format.ID) }
FormatsById[id] = format
}
}

// Taken from github.com/rg3/youtube-dl
// As in youtube_dl/extractor/youtube.py
var Formats = []Format{
// Standard formats
{ "5", "flv", 400, 240, "h263", "mp3", 64, FormatStd },
{ "6", "flv", 450, 270, "h263", "mp3", 64, FormatStd },
{ "5", "flv", 400, 240, "h263", "mp3", 64, FormatStd },
{ "6", "flv", 450, 270, "h263", "mp3", 64, FormatStd },
{ "13", "3gp", 0, 0, "mp4v", "aac", 0, FormatStd },
{ "17", "3gp", 176, 144, "mp4v", "aac", 24, FormatStd },
{ "18", "mp4", 640, 360, "h264", "aac", 96, FormatStd },
@@ -47,21 +72,21 @@ var Formats = []Format{
{ "78", "mp4", 854, 480, "h264", "aac", 128, FormatStd },

// 3D videos
{ "82", "mp4", 0, 360, "h264", "aac", 128, Format3D },
{ "83", "mp4", 0, 480, "h264", "aac", 128, Format3D },
{ "84", "mp4", 0, 720, "h264", "aac", 192, Format3D },
{ "85", "mp4", 0, 1080, "h264", "aac", 192, Format3D },
{ "82", "mp4", 0, 360, "h264", "aac", 128, Format3D },
{ "83", "mp4", 0, 480, "h264", "aac", 128, Format3D },
{ "84", "mp4", 0, 720, "h264", "aac", 192, Format3D },
{ "85", "mp4", 0, 1080, "h264", "aac", 192, Format3D },
{ "100", "webm", 0, 360, "vp8", "vorbis", 128, Format3D },
{ "101", "webm", 0, 480, "vp8", "vorbis", 192, Format3D },
{ "102", "webm", 0, 720, "vp8", "vorbis", 192, Format3D },

// Apple HTTP Live Streaming
{ "91", "mp4", 0, 144, "h264", "aac", 48, FormatHLS },
{ "92", "mp4", 0, 240, "h264", "aac", 48, FormatHLS },
{ "93", "mp4", 0, 360, "h264", "aac", 128, FormatHLS },
{ "94", "mp4", 0, 480, "h264", "aac", 128, FormatHLS },
{ "95", "mp4", 0, 720, "h264", "aac", 256, FormatHLS },
{ "96", "mp4", 0, 1080, "h264", "aac", 256, FormatHLS },
{ "91", "mp4", 0, 144, "h264", "aac", 48, FormatHLS },
{ "92", "mp4", 0, 240, "h264", "aac", 48, FormatHLS },
{ "93", "mp4", 0, 360, "h264", "aac", 128, FormatHLS },
{ "94", "mp4", 0, 480, "h264", "aac", 128, FormatHLS },
{ "95", "mp4", 0, 720, "h264", "aac", 256, FormatHLS },
{ "96", "mp4", 0, 1080, "h264", "aac", 256, FormatHLS },
{ "132", "mp4", 0, 240, "h264", "aac", 48, FormatHLS },
{ "151", "mp4", 0, 72, "h264", "aac", 24, FormatHLS },

@@ -121,3 +146,19 @@ var Formats = []Format{
{ "250", "webm", 0, 0, "", "opus", 70, FormatDASH | FormatAudioOnly },
{ "251", "webm", 0, 0, "", "opus", 160, FormatDASH | FormatAudioOnly },
}

func (f FormatType) MarshalJSON() ([]byte, error) {
flags := make([]string, 0)
setFlag := func(mask FormatType, name string) {
if f&mask != 0 {
flags = append(flags, name)
}
}
setFlag(FormatVideoOnly, "videoOnly")
setFlag(FormatAudioOnly, "audioOnly")
setFlag(Format3D, "3d")
setFlag(FormatHLS, "hls")
setFlag(FormatDASH, "dash")
setFlag(FormatHighFps, "hiFps")
return json.Marshal(flags)
}

+ 2
- 2
data/video.go View File

@@ -14,9 +14,9 @@ type Video struct {
URL string `json:"url"`
License string `json:"license,omitempty"`
Genre string `json:"genre"`
Tags []string `json:"tags"`
Tags []string `json:"tags,omitempty"`
Subtitles []string `json:"subtitles,omitempty"`
Duration time.Duration `json:"duration"`
Duration uint64 `json:"duration"`
FamilyFriendly bool `json:"family_friendly"`
Views uint64 `json:"views"`
Likes uint64 `json:"likes"`

Loading…
Cancel
Save