Browse Source

Refactor API selection

master
terorie 1 year ago
parent
commit
c3b6047f98

+ 5
- 1
.gitignore View File

@@ -1,5 +1,9 @@
# IntelliJ
/idea/
/.idea/

# Apple
.DS_STORE

# Executables
/yt-mango
/yt-mango.exe

+ 23
- 0
api/api.go View File

@@ -0,0 +1,23 @@
package api

import (
"github.com/terorie/yt-mango/data"
"github.com/terorie/yt-mango/classic"
"github.com/terorie/yt-mango/apiclassic"
)

type API struct {
GetVideo func(*data.Video) error
GetChannel func(*data.Channel) error
GetChannelVideoURLs func(channelID string, page uint) ([]string, error)
}

var ClassicAPI = API{
GetVideo: apiclassic.GetVideo,
GetChannel: apiclassic.GetChannel,
GetChannelVideoURLs: apiclassic.GetChannelVideoURLs,
}

var JsonAPI struct {

}

classic/data.go → apiclassic/data.go View File

@@ -1,4 +1,4 @@
package classic
package apiclassic

type XMLSubTrackList struct {
Tracks []struct {

classic/get.go → apiclassic/get.go View File

@@ -1,11 +1,11 @@
package classic
package apiclassic

import (
"github.com/terorie/yt-mango/data"
"errors"
)

func Get(v *data.Video) error {
func GetVideo(v *data.Video) error {
if len(v.ID) == 0 { return errors.New("no video ID") }

// Download the doc tree
@@ -19,3 +19,11 @@ func Get(v *data.Video) error {

return nil
}

func GetChannel(c *data.Channel) error {
return errors.New("not implemented")
}

func GetChannelVideoURLs(channelID string, page uint) ([]string, error) {
return nil, errors.New("not implemented")
}

classic/grab.go → apiclassic/grab.go View File

@@ -1,4 +1,4 @@
package classic
package apiclassic

import (
"net/http"

classic/parse.go → apiclassic/parse.go View File

@@ -1,4 +1,4 @@
package classic
package apiclassic

import (
"github.com/PuerkitoBio/goquery"

classic/parsedescription.go → apiclassic/parsedescription.go View File

@@ -1,4 +1,4 @@
package classic
package apiclassic

import (
"errors"

classic/util.go → apiclassic/util.go View File

@@ -1,4 +1,4 @@
package classic
package apiclassic

import (
"errors"

+ 25
- 0
apijson/get.go View File

@@ -0,0 +1,25 @@
package apijson

import (
"github.com/terorie/yt-mango/data"
"errors"
)

func GetVideo(v *data.Video) (err error) {
jsn, err := GrabVideo(v)
if err != nil { return }
err = ParseVideo(v, jsn)
if err != nil { return }
return
}

func GetChannel(c *data.Channel) error {
return errors.New("not implemented")
}

func GetChannelVideoURLs(channelID string, page uint) (urls []string, err error) {
jsn, err := GrabChannelPage(channelID, page)
if err != nil { return }
urls, err = ParseChannelPageLinks(jsn)
return
}

+ 72
- 0
apijson/grab.go View File

@@ -0,0 +1,72 @@
package apijson

import (
"github.com/terorie/yt-mango/data"
"net/http"
"github.com/terorie/yt-mango/common"
"github.com/valyala/fastjson"
"io/ioutil"
"errors"
)

const videoURL = "https://www.youtube.com/watch?pbj=1&v="
const channelURL = "https://www.youtube.com/browse_ajax?ctoken="

func GrabVideo(v *data.Video) (root *fastjson.Value, err error) {
// Prepare request
req, err := http.NewRequest("GET", videoURL+ v.ID, nil)
if err != nil { return nil, err }
setHeaders(&req.Header)

// Send request
res, err := common.Client.Do(req)
if err != nil { return }

// Download response
body, err := ioutil.ReadAll(res.Body)
if err != nil { return }

// Parse JSON
var p fastjson.Parser
root, err = p.ParseBytes(body)
if err != nil { return }

return
}

func GrabChannelPage(channelID string, page uint) (root *fastjson.Value, err error) {
// Generate page URL
token := GenChannelPageToken(channelID, uint64(page))
url := channelURL + token

// Prepare request
req, err := http.NewRequest("GET", url, nil)
if err != nil { return nil, err }
setHeaders(&req.Header)

// Send request
res, err := common.Client.Do(req)
if err != nil { return nil, err }
if res.StatusCode == 500 {
defer res.Body.Close()
buf, _ := ioutil.ReadAll(res.Body)
println(string(buf))
}
if res.StatusCode != 200 { return nil, errors.New("HTTP failure") }

// Download response
defer res.Body.Close()
buf, err := ioutil.ReadAll(res.Body)
if err != nil { return nil, err }

// Parse JSON
var p fastjson.Parser
root, err = p.ParseBytes(buf)
return
}

func setHeaders(h *http.Header) {
h.Add("Host", "www.youtube.com")
h.Add("X-YouTube-Client-Name", "1")
h.Add("X-YouTube-Client-Version", "2.20170707")
}

browseajax/parse.go → apijson/parsechannel.go View File

@@ -1,4 +1,4 @@
package browseajax
package apijson

import (
"github.com/valyala/fastjson"
@@ -9,7 +9,7 @@ import (
var MissingData = errors.New("missing data")
var ServerError = errors.New("server error")

func ParsePage(rootObj *fastjson.Value) ([]string, error) {
func ParseChannelPageLinks(rootObj *fastjson.Value) ([]string, error) {
// Root as array
root, err := rootObj.Array()
if err != nil { return nil, err }
@@ -22,8 +22,7 @@ func ParsePage(rootObj *fastjson.Value) ([]string, error) {
break
}
}
if container == nil { return nil, MissingData
}
if container == nil { return nil, MissingData }

// Get error obj
errorExists := container.Exists(
@@ -32,8 +31,7 @@ func ParsePage(rootObj *fastjson.Value) ([]string, error) {
"errors",
"error",
)
if errorExists { return nil, ServerError
}
if errorExists { return nil, ServerError }

// Get items from grid
itemsObj := container.Get(
@@ -42,8 +40,7 @@ func ParsePage(rootObj *fastjson.Value) ([]string, error) {
"gridContinuation",
"items",
)
if itemsObj == nil { return nil, MissingData
}
if itemsObj == nil { return nil, MissingData }

// Items as array
items, err := itemsObj.Array()
@@ -61,8 +58,7 @@ func ParsePage(rootObj *fastjson.Value) ([]string, error) {
"webCommandMetadata",
"url",
)
if urlObj == nil { return nil, MissingData
}
if urlObj == nil { return nil, MissingData }

// URL as string
urlBytes, err := urlObj.StringBytes()

+ 36
- 0
apijson/parsevideo.go View File

@@ -0,0 +1,36 @@
package apijson

import (
"github.com/valyala/fastjson"
"github.com/terorie/yt-mango/data"
"errors"
)

var missingData = errors.New("missing data")
var unexpectedType = errors.New("unexpected type")

func ParseVideo(v *data.Video, root *fastjson.Value) error {
rootArray := root.GetArray()
if rootArray == nil { return unexpectedType }

var videoDetails *fastjson.Value
for _, sub := range rootArray {
videoDetails = sub.Get("page", "playerResponse", "videoDetails")
if videoDetails != nil { break }
}

keywords := videoDetails.GetArray("keywords")
if keywords == nil { return missingData }
for _, keywordValue := range keywords {
keywordBytes, _ := keywordValue.StringBytes()
if keywordBytes == nil { continue }

keyword := string(keywordBytes)
v.Tags = append(v.Tags, keyword)
}

titleBytes := videoDetails.GetStringBytes("title")
if titleBytes != nil { v.Title = string(titleBytes) }

return nil
}

browseajax/token.go → apijson/token.go View File

@@ -1,4 +1,4 @@
package browseajax
package apijson

import (
"bytes"
@@ -6,7 +6,7 @@ import (
"encoding/base64"
)

func GenerateToken(channelId string, page uint64) string {
func GenChannelPageToken(channelId string, page uint64) string {
// Generate the inner token
token := genInnerToken(page)


+ 0
- 9
browseajax/get.go View File

@@ -1,9 +0,0 @@
package browseajax

func GetPage(channelID string, page uint) ([]string, error) {
root, err := GrabPage(channelID, page)
if err != nil { return nil, err }
urls, err := ParsePage(root)
if err != nil { return nil, err }
return urls, nil
}

+ 0
- 45
browseajax/grab.go View File

@@ -1,45 +0,0 @@
package browseajax

import (
"net/http"
"github.com/terorie/yt-mango/common"
"errors"
"io/ioutil"
"github.com/valyala/fastjson"
)

const mainURL = "https://www.youtube.com/browse_ajax?ctoken="

func GrabPage(channelID string, page uint) (*fastjson.Value, error) {
// Generate page URL
token := GenerateToken(channelID, uint64(page))
url := mainURL + token

// Prepare request
req, err := http.NewRequest("GET", url, nil)
if err != nil { return nil, err }
req.Header.Add("X-YouTube-Client-Name", "1")
req.Header.Add("X-YouTube-Client-Version", "2.20180726")

// Send request
res, err := common.Client.Do(req)
if err != nil { return nil, err }
if res.StatusCode == 500 {
defer res.Body.Close()
buf, _ := ioutil.ReadAll(res.Body)
println(string(buf))
}
if res.StatusCode != 200 { return nil, errors.New("HTTP failure") }

// Download response
defer res.Body.Close()
buf, err := ioutil.ReadAll(res.Body)
if err != nil { return nil, err }

// Parse JSON
var p fastjson.Parser
root, err := p.ParseBytes(buf)
if err != nil { return nil, err }

return root, nil
}

+ 22
- 0
cmd/channel.go View File

@@ -0,0 +1,22 @@
package cmd

import (
"github.com/spf13/cobra"
"regexp"
)

var force bool
var offset uint32

var Channel = cobra.Command{
Use: "channel",
Short: "Get information about a channel",
}

var matchChannelID = regexp.MustCompile("^([\\w\\-]|(%3[dD]))+$")

func init() {
channelDumpCmd.Flags().BoolVarP(&force, "force", "f", false, "Overwrite the output file if it already exists")
channelDumpCmd.Flags().Uint32Var(&offset, "page-offset", 1, "Start getting videos at this page. (A page is usually 30 videos)")
Channel.AddCommand(&channelDumpCmd)
}

channel.go → cmd/channeldump.go View File

@@ -1,28 +1,17 @@
package main
package cmd

import (
"github.com/spf13/cobra"
"github.com/terorie/yt-mango/browseajax"
"regexp"
"net/url"
"fmt"
"os"
"net/url"
"strings"
"log"
"time"
"bufio"
"log"
"github.com/terorie/yt-mango/apijson"
)

var force bool
var offset uint32

var channelCmd = cobra.Command{
Use: "channel",
Short: "Get information about a channel",
}

var matchChannelID = regexp.MustCompile("^([\\w\\-]|(%3[dD]))+$")

var channelDumpCmd = cobra.Command{
Use: "dumpurls <channel ID> <file>",
Short: "Get all public video URLs from channel",
@@ -85,7 +74,7 @@ var channelDumpCmd = cobra.Command{

totalURLs := 0
for i := offset; true; i++ {
channelURLs, err := browseajax.GetPage(channelID, uint(i))
channelURLs, err := apijson.GetChannelVideoURLs(channelID, uint(i))
if err != nil {
log.Printf("Aborting on error %v.", err)
break
@@ -107,9 +96,3 @@ var channelDumpCmd = cobra.Command{
log.Printf("Got %d URLs in %s.", totalURLs, duration.String())
},
}

func init() {
channelDumpCmd.Flags().BoolVarP(&force, "force", "f", false, "Overwrite the output file if it already exists")
channelDumpCmd.Flags().Uint32Var(&offset, "page-offset", 1, "Start getting videos at this page. (A page is usually 30 videos)")
channelCmd.AddCommand(&channelDumpCmd)
}

+ 12
- 0
cmd/video.go View File

@@ -0,0 +1,12 @@
package cmd

import "github.com/spf13/cobra"

var Video = cobra.Command{
Use: "video",
Short: "Get information about a video",
}

func init() {
Video.AddCommand(&videoDetailCmd)
}

+ 14
- 0
cmd/videodetail.go View File

@@ -0,0 +1,14 @@
package cmd

import "github.com/spf13/cobra"

var videoDetailCmd = cobra.Command{
Use: "detail <video ID> [file]",
Short: "Get details about a video",
Run: func(cmd *cobra.Command, args []string) {

},
}

func init() {
}

+ 6
- 0
data/channel.go View File

@@ -0,0 +1,6 @@
package data

type Channel struct {
ID string `json:"id"`
Name string `json:"name"`
}

+ 18
- 11
main.go View File

@@ -1,6 +1,5 @@
/* youtube-ma for MongoDB
*
* Based on https://github.com/CorentinB/youtube-ma */
// yt-mango: YT video metadata archiving utility
// Copyright (C) 2018 terorie

package main

@@ -8,6 +7,8 @@ import (
"github.com/spf13/cobra"
"fmt"
"os"
"github.com/terorie/yt-mango/cmd"
"log"
)

const Version = "v0.1 -- dev"
@@ -17,21 +18,27 @@ func printVersion(_ *cobra.Command, _ []string) {
}

func main() {
// All diagnostics (logging) should go to stderr
log.SetOutput(os.Stderr)

var printVersion bool
rootCmd := cobra.Command{
Use: "yt-mango",
Short: "YT-Mango is a scalable video metadata archiver",
Long: "YT-Mango is a scalable video metadata archiving utility\n" +
"written by terorie for https://the-eye.eu/",
PreRun: func(cmd *cobra.Command, args []string) {
if printVersion {
fmt.Println(Version)
os.Exit(0)
}
},
}
rootCmd.Flags().BoolVar(&printVersion, "version", false,
fmt.Sprintf("Print the version (" + Version +") and exit"), )

versionCmd := cobra.Command{
Use: "version",
Short: "Get the version number of yt-mango",
Run: printVersion,
}

rootCmd.AddCommand(&versionCmd)
rootCmd.AddCommand(&channelCmd)
rootCmd.AddCommand(&cmd.Channel)
rootCmd.AddCommand(&cmd.Video)

if err := rootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err)

Loading…
Cancel
Save