Browse Source

Cobra & channel video dumper

master
terorie 1 year ago
parent
commit
b6a0e41840
13 changed files with 354 additions and 30 deletions
  1. 3
    1
      README.md
  2. 9
    0
      browseajax/get.go
  3. 45
    0
      browseajax/grab.go
  4. 60
    0
      browseajax/parse.go
  5. 75
    0
      browseajax/token.go
  6. 2
    19
      classic/grab.go
  7. 13
    1
      common/http.go
  8. 13
    0
      controller/control.go
  9. 25
    9
      main.go
  10. 78
    0
      pretty/ansi.go
  11. 16
    0
      pretty/istty.go
  12. 5
    0
      version/get.go
  13. 10
    0
      work.go

+ 3
- 1
README.md View File

@@ -10,8 +10,10 @@ If you don't have a Go toolchain, grab an executable from the Releases tab

##### Project structure

- _/common_: commonly used HTTP code
- _/controller_: Manages workers (sends tasks, gets results, …)
- _/common_: Commonly used HTTP code
- _/data_: Data structures
- _/db_: MongoDB connection
- _/classic_: Extractor calling the HTML `/watch` API
- _/watchapi_: Extractor calling the JSON `/watch` API


+ 9
- 0
browseajax/get.go View File

@@ -0,0 +1,9 @@
package browseajax

func GetPage(channelID string, page uint) error {
root, err := GrabPage(channelID, page)
if err != nil { return err }
err = ParsePage(root)
if err != nil { return err }
return nil
}

+ 45
- 0
browseajax/grab.go View File

@@ -0,0 +1,45 @@
package browseajax

import (
"net/http"
"github.com/terorie/yt-mango/common"
"errors"
"io/ioutil"
"github.com/valyala/fastjson"
)

const mainURL = "https://www.youtube.com/browse_ajax?ctoken="

func GrabPage(channelID string, page uint) (*fastjson.Value, error) {
// Generate page URL
token := GenerateToken(channelID, uint64(page))
url := mainURL + token

// Prepare request
req, err := http.NewRequest("GET", url, nil)
if err != nil { return nil, err }
req.Header.Add("X-YouTube-Client-Name", "1")
req.Header.Add("X-YouTube-Client-Version", "2.20180726")

// Send request
res, err := common.Client.Do(req)
if err != nil { return nil, err }
if res.StatusCode == 500 {
defer res.Body.Close()
buf, _ := ioutil.ReadAll(res.Body)
println(string(buf))
}
if res.StatusCode != 200 { return nil, errors.New("HTTP failure") }

// Download response
defer res.Body.Close()
buf, err := ioutil.ReadAll(res.Body)
if err != nil { return nil, err }

// Parse JSON
var p fastjson.Parser
root, err := p.ParseBytes(buf)
if err != nil { return nil, err }

return root, nil
}

+ 60
- 0
browseajax/parse.go View File

@@ -0,0 +1,60 @@
package browseajax

import (
"github.com/valyala/fastjson"
"errors"
)

var missingData = errors.New("missing data")

func ParsePage(rootObj *fastjson.Value) error {
// Root as array
root, err := rootObj.Array()
if err != nil { return err }

// Find response container
var container *fastjson.Value
for _, item := range root {
if item.Exists("response") {
container = item
break
}
}
if container == nil { return missingData }

// Get error obj

// Get items from grid
itemsObj := container.Get(
"response",
"continuationContents",
"gridContinuation",
"items",
)
if itemsObj == nil { return missingData }

// Items as array
items, err := itemsObj.Array()
if err != nil { return err }

// Enumerate
for _, item := range items {
// Find URL
urlObj := item.Get(
"gridVideoRenderer",
"navigationEndpoint",
"commandMetadata",
"webCommandMetadata",
"url",
)
if urlObj == nil { return missingData }

// URL as string
urlBytes, err := urlObj.StringBytes()
if err != nil { return err }
url := string(urlBytes)

println(url)
}
return nil
}

+ 75
- 0
browseajax/token.go View File

@@ -0,0 +1,75 @@
package browseajax

import (
"bytes"
"strconv"
"encoding/base64"
)

func GenerateToken(channelId string, page uint64) string {
// Generate the inner token
token := genInnerToken(page)

// Build the inner object
var inner bytes.Buffer

// channelId
inner.WriteByte(0x12) // type
writeVarint(&inner, uint64(len(channelId))) // len
inner.WriteString(channelId) // data

// token
inner.WriteByte(0x1a) // type
writeVarint(&inner, uint64(len(token))) // len
inner.WriteString(token) // data

innerBytes := inner.Bytes()

var root bytes.Buffer

// innerBytes
root.Write([]byte{0xe2, 0xa9, 0x85, 0xb2, 0x02}) // probably types
writeVarint(&root, uint64(len(innerBytes)))
root.Write(innerBytes)

rootBytes := root.Bytes()

return base64.URLEncoding.EncodeToString(rootBytes)
}

func genInnerToken(page uint64) string {
var buf bytes.Buffer

pageStr := strconv.FormatUint(page, 10)

// Probably protobuf
buf.Write([]byte{0x12, 0x06})
buf.WriteString("videos")
buf.Write([]byte{
0x20, 0x00, 0x30, 0x01, 0x38, 0x01, 0x60, 0x01,
0x6a, 0x00, 0x7a,
})
// Write size-prefixed page string
writeVarint(&buf, uint64(len(pageStr)))
buf.WriteString(pageStr)
buf.Write([]byte{0xb8, 0x01, 0x00})

return base64.URLEncoding.EncodeToString(buf.Bytes())
}

func writeVarint(buf *bytes.Buffer, n uint64) {
var enc [10]byte
i := uint(0)
for {
enc[i] = uint8(n & 0x7F)
n >>= 7
if n != 0 {
enc[i] |= 0x80
i++
} else {
i++
break
}
}
buf.Write(enc[:i])
}

+ 2
- 19
classic/grab.go View File

@@ -4,17 +4,11 @@ import (
"net/http"
"errors"
"encoding/xml"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/terorie/yt-mango/data"
"github.com/terorie/yt-mango/common"
)

var transport = http.Transport{
MaxIdleConns: 10,
IdleConnTimeout: 30 * time.Second,
}
var client = http.Client{Transport: &transport}

const mainURL = "https://www.youtube.com/watch?has_verified=1&bpctr=6969696969&v="
const subtitleURL = "https://video.google.com/timedtext?type=list&v="

@@ -24,7 +18,7 @@ func grab(v *data.Video) (doc *goquery.Document, err error) {
if err != nil { return }
requestHeader(&req.Header)

res, err := client.Do(req)
res, err := common.Client.Do(req)
if err != nil { return }
if res.StatusCode != 200 { return nil, errors.New("HTTP failure") }

@@ -38,12 +32,9 @@ func grab(v *data.Video) (doc *goquery.Document, err error) {
// Grabs and parses a subtitle list
func grabSubtitleList(v *data.Video) (err error) {
req, err := http.NewRequest("GET", subtitleURL + v.ID, nil)

if err != nil { return err }
requestHeader(&req.Header)

res, err := client.Do(req)

if err != nil { return err }
if res.StatusCode != 200 { return errors.New("HTTP failure") }

@@ -60,11 +51,3 @@ func grabSubtitleList(v *data.Video) (err error) {

return
}

// Important:
// - Set header "Accept-Language: en-US" or else parser might break
// - Set header "User-Agent: youtube-mango/1.0"
func requestHeader(h *http.Header) {
h.Add("Accept-Language", "en-US")
h.Add("User-Agent", "youtube-mango/0.1")
}

+ 13
- 1
common/http.go View File

@@ -2,4 +2,16 @@ package common

import "net/http"

var Client = http.Client{Transport: http.DefaultTransport}
// Custom headers
type transport struct{}

// Important:
// - Set header "Accept-Language: en-US" or else parser might break
// - Set header "User-Agent: youtube-mango/1.0"
func (t transport) RoundTrip(r *http.Request) (*http.Response, error) {
r.Header.Add("Accept-Language", "en-US")
r.Header.Add("User-Agent", "youtube-mango/0.1")
return http.DefaultTransport.RoundTrip(r)
}

var Client = http.Client{Transport: transport{}}

+ 13
- 0
controller/control.go View File

@@ -0,0 +1,13 @@
package controller

type Controller struct {

}

func (c *Controller) NewController() {

}

func (c *Controller) Schedule() {

}

+ 25
- 9
main.go View File

@@ -5,19 +5,35 @@
package main

import (
"encoding/json"
"github.com/terorie/yt-mango/data"
"github.com/terorie/yt-mango/classic"
"github.com/spf13/cobra"
"fmt"
"os"
)

const Version = "v0.1 -- dev"

func printVersion(_ *cobra.Command, _ []string) {
fmt.Println("YT-Mango archiver", Version)
}

func main() {
v := data.Video{ID: "kj9mFK62c6E"}
rootCmd := cobra.Command{
Use: "yt-mango",
Short: "YT-Mango is a scalable video metadata archiver",
Long: "YT-Mango is a scalable video metadata archiving utility\n" +
"written by terorie with help from the-eye.eu",
}

err := classic.Get(&v)
if err != nil { panic(err) }
versionCmd := cobra.Command{
Use: "version",
Short: "Get the version number of yt-mango",
Run: printVersion,
}

jsn, err := json.MarshalIndent(v, "", "\t")
if err != nil { panic(err) }
rootCmd.AddCommand(&versionCmd)

println(string(jsn))
if err := rootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}

+ 78
- 0
pretty/ansi.go View File

@@ -0,0 +1,78 @@
package pretty

import (
"bytes"
)

type Code string
type Codes []Code


type Effect interface {
E(string) string
}

// Empty effect
type nilEffect struct{}
func (_ nilEffect) E(x string) string { return x }

// Custom effect
type customEffect func(string) string
func (e customEffect) E(x string) string { return e(x) }

const (
RESET = Code("0")
BOLD = Code("1")
DIM = Code("2")
ITALIC = Code("3")
UNDERL = Code("4")
INV = Code("7")
HIDDEN = Code("8")
STRIKE = Code("9")
BLACK = Code("30")
RED = Code("31")
GREEN = Code("32")
YELLOW = Code("33")
BLUE = Code("34")
MGNTA = Code("35")
CYAN = Code("36")
WHITE = Code("37")
HBLACK = Code("90")
HRED = Code("91")
HGREEN = Code("92")
HYELLOW = Code("93")
HBLUE = Code("94")
HMGNTA = Code("95")
HCYAN = Code("96")
HWHITE = Code("97")
)

func Add(x... Code) Codes {
return Codes(x)
}

func (c Code) E(x string) string {
if !isTTY { return x }
return "\x1b[" + string(c) + "m" + x + "\x1b[0m"
}

func (cs Codes) E(x string) string {
if !isTTY { return x }
var b bytes.Buffer
b.WriteString("\x1b[")
for _, c := range cs {
b.WriteRune(';')
b.WriteString(string(c))
}
b.WriteRune('m')
b.WriteString(x)
b.WriteString("\x1b[0m")
return b.String()
}

func Wrap(e Effect, wrapper string) Effect {
if !isTTY { return nilEffect{} }
return customEffect(func(s string) string {
return e.E(wrapper[0:1]) + s + e.E(wrapper[1:2])
})
}

+ 16
- 0
pretty/istty.go View File

@@ -0,0 +1,16 @@
package pretty

import (
"os"
"strings"
"github.com/mattn/go-isatty"
)

var isTTY bool

func init() {
term := os.Getenv("TERM")

isTTY = strings.HasPrefix(term, "xterm") ||
isatty.IsTerminal(os.Stdout.Fd())
}

+ 5
- 0
version/get.go View File

@@ -0,0 +1,5 @@
package version

func Get() string {
return "v0.1 -- dev"
}

+ 10
- 0
work.go View File

@@ -0,0 +1,10 @@
package main

import "github.com/spf13/cobra"

var workCmd = cobra.Command{
Use: "work",
Short: "Connect to a queue and start archiving",
Long: "Get work from a Redis queue, start extracting metadata\n" +
"and upload it to a Mongo database.",
}

Loading…
Cancel
Save