Browse Source

Refactor API selection

master
terorie 4 years ago
parent
commit
c3b6047f98
  1. 6
      .gitignore
  2. 23
      api/api.go
  3. 2
      apiclassic/data.go
  4. 12
      apiclassic/get.go
  5. 2
      apiclassic/grab.go
  6. 2
      apiclassic/parse.go
  7. 2
      apiclassic/parsedescription.go
  8. 2
      apiclassic/util.go
  9. 25
      apijson/get.go
  10. 72
      apijson/grab.go
  11. 16
      apijson/parsechannel.go
  12. 36
      apijson/parsevideo.go
  13. 4
      apijson/token.go
  14. 9
      browseajax/get.go
  15. 45
      browseajax/grab.go
  16. 22
      cmd/channel.go
  17. 27
      cmd/channeldump.go
  18. 12
      cmd/video.go
  19. 14
      cmd/videodetail.go
  20. 6
      data/channel.go
  21. 29
      main.go

6
.gitignore vendored

@ -1,5 +1,9 @@ @@ -1,5 +1,9 @@
# IntelliJ
/idea/
/.idea/
# Apple
.DS_STORE
# Executables
/yt-mango
/yt-mango.exe

23
api/api.go

@ -0,0 +1,23 @@ @@ -0,0 +1,23 @@
package api
import (
"github.com/terorie/yt-mango/data"
"github.com/terorie/yt-mango/classic"
"github.com/terorie/yt-mango/apiclassic"
)
type API struct {
GetVideo func(*data.Video) error
GetChannel func(*data.Channel) error
GetChannelVideoURLs func(channelID string, page uint) ([]string, error)
}
var ClassicAPI = API{
GetVideo: apiclassic.GetVideo,
GetChannel: apiclassic.GetChannel,
GetChannelVideoURLs: apiclassic.GetChannelVideoURLs,
}
var JsonAPI struct {
}

2
classic/data.go → apiclassic/data.go

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
package classic
package apiclassic
type XMLSubTrackList struct {
Tracks []struct {

12
classic/get.go → apiclassic/get.go

@ -1,11 +1,11 @@ @@ -1,11 +1,11 @@
package classic
package apiclassic
import (
"github.com/terorie/yt-mango/data"
"errors"
)
func Get(v *data.Video) error {
func GetVideo(v *data.Video) error {
if len(v.ID) == 0 { return errors.New("no video ID") }
// Download the doc tree
@ -19,3 +19,11 @@ func Get(v *data.Video) error { @@ -19,3 +19,11 @@ func Get(v *data.Video) error {
return nil
}
func GetChannel(c *data.Channel) error {
return errors.New("not implemented")
}
func GetChannelVideoURLs(channelID string, page uint) ([]string, error) {
return nil, errors.New("not implemented")
}

2
classic/grab.go → apiclassic/grab.go

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
package classic
package apiclassic
import (
"net/http"

2
classic/parse.go → apiclassic/parse.go

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
package classic
package apiclassic
import (
"github.com/PuerkitoBio/goquery"

2
classic/parsedescription.go → apiclassic/parsedescription.go

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
package classic
package apiclassic
import (
"errors"

2
classic/util.go → apiclassic/util.go

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
package classic
package apiclassic
import (
"errors"

25
apijson/get.go

@ -0,0 +1,25 @@ @@ -0,0 +1,25 @@
package apijson
import (
"github.com/terorie/yt-mango/data"
"errors"
)
func GetVideo(v *data.Video) (err error) {
jsn, err := GrabVideo(v)
if err != nil { return }
err = ParseVideo(v, jsn)
if err != nil { return }
return
}
func GetChannel(c *data.Channel) error {
return errors.New("not implemented")
}
func GetChannelVideoURLs(channelID string, page uint) (urls []string, err error) {
jsn, err := GrabChannelPage(channelID, page)
if err != nil { return }
urls, err = ParseChannelPageLinks(jsn)
return
}

72
apijson/grab.go

@ -0,0 +1,72 @@ @@ -0,0 +1,72 @@
package apijson
import (
"github.com/terorie/yt-mango/data"
"net/http"
"github.com/terorie/yt-mango/common"
"github.com/valyala/fastjson"
"io/ioutil"
"errors"
)
const videoURL = "https://www.youtube.com/watch?pbj=1&v="
const channelURL = "https://www.youtube.com/browse_ajax?ctoken="
func GrabVideo(v *data.Video) (root *fastjson.Value, err error) {
// Prepare request
req, err := http.NewRequest("GET", videoURL+ v.ID, nil)
if err != nil { return nil, err }
setHeaders(&req.Header)
// Send request
res, err := common.Client.Do(req)
if err != nil { return }
// Download response
body, err := ioutil.ReadAll(res.Body)
if err != nil { return }
// Parse JSON
var p fastjson.Parser
root, err = p.ParseBytes(body)
if err != nil { return }
return
}
func GrabChannelPage(channelID string, page uint) (root *fastjson.Value, err error) {
// Generate page URL
token := GenChannelPageToken(channelID, uint64(page))
url := channelURL + token
// Prepare request
req, err := http.NewRequest("GET", url, nil)
if err != nil { return nil, err }
setHeaders(&req.Header)
// Send request
res, err := common.Client.Do(req)
if err != nil { return nil, err }
if res.StatusCode == 500 {
defer res.Body.Close()
buf, _ := ioutil.ReadAll(res.Body)
println(string(buf))
}
if res.StatusCode != 200 { return nil, errors.New("HTTP failure") }
// Download response
defer res.Body.Close()
buf, err := ioutil.ReadAll(res.Body)
if err != nil { return nil, err }
// Parse JSON
var p fastjson.Parser
root, err = p.ParseBytes(buf)
return
}
func setHeaders(h *http.Header) {
h.Add("Host", "www.youtube.com")
h.Add("X-YouTube-Client-Name", "1")
h.Add("X-YouTube-Client-Version", "2.20170707")
}

16
browseajax/parse.go → apijson/parsechannel.go

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
package browseajax
package apijson
import (
"github.com/valyala/fastjson"
@ -9,7 +9,7 @@ import ( @@ -9,7 +9,7 @@ import (
var MissingData = errors.New("missing data")
var ServerError = errors.New("server error")
func ParsePage(rootObj *fastjson.Value) ([]string, error) {
func ParseChannelPageLinks(rootObj *fastjson.Value) ([]string, error) {
// Root as array
root, err := rootObj.Array()
if err != nil { return nil, err }
@ -22,8 +22,7 @@ func ParsePage(rootObj *fastjson.Value) ([]string, error) { @@ -22,8 +22,7 @@ func ParsePage(rootObj *fastjson.Value) ([]string, error) {
break
}
}
if container == nil { return nil, MissingData
}
if container == nil { return nil, MissingData }
// Get error obj
errorExists := container.Exists(
@ -32,8 +31,7 @@ func ParsePage(rootObj *fastjson.Value) ([]string, error) { @@ -32,8 +31,7 @@ func ParsePage(rootObj *fastjson.Value) ([]string, error) {
"errors",
"error",
)
if errorExists { return nil, ServerError
}
if errorExists { return nil, ServerError }
// Get items from grid
itemsObj := container.Get(
@ -42,8 +40,7 @@ func ParsePage(rootObj *fastjson.Value) ([]string, error) { @@ -42,8 +40,7 @@ func ParsePage(rootObj *fastjson.Value) ([]string, error) {
"gridContinuation",
"items",
)
if itemsObj == nil { return nil, MissingData
}
if itemsObj == nil { return nil, MissingData }
// Items as array
items, err := itemsObj.Array()
@ -61,8 +58,7 @@ func ParsePage(rootObj *fastjson.Value) ([]string, error) { @@ -61,8 +58,7 @@ func ParsePage(rootObj *fastjson.Value) ([]string, error) {
"webCommandMetadata",
"url",
)
if urlObj == nil { return nil, MissingData
}
if urlObj == nil { return nil, MissingData }
// URL as string
urlBytes, err := urlObj.StringBytes()

36
apijson/parsevideo.go

@ -0,0 +1,36 @@ @@ -0,0 +1,36 @@
package apijson
import (
"github.com/valyala/fastjson"
"github.com/terorie/yt-mango/data"
"errors"
)
var missingData = errors.New("missing data")
var unexpectedType = errors.New("unexpected type")
func ParseVideo(v *data.Video, root *fastjson.Value) error {
rootArray := root.GetArray()
if rootArray == nil { return unexpectedType }
var videoDetails *fastjson.Value
for _, sub := range rootArray {
videoDetails = sub.Get("page", "playerResponse", "videoDetails")
if videoDetails != nil { break }
}
keywords := videoDetails.GetArray("keywords")
if keywords == nil { return missingData }
for _, keywordValue := range keywords {
keywordBytes, _ := keywordValue.StringBytes()
if keywordBytes == nil { continue }
keyword := string(keywordBytes)
v.Tags = append(v.Tags, keyword)
}
titleBytes := videoDetails.GetStringBytes("title")
if titleBytes != nil { v.Title = string(titleBytes) }
return nil
}

4
browseajax/token.go → apijson/token.go

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
package browseajax
package apijson
import (
"bytes"
@ -6,7 +6,7 @@ import ( @@ -6,7 +6,7 @@ import (
"encoding/base64"
)
func GenerateToken(channelId string, page uint64) string {
func GenChannelPageToken(channelId string, page uint64) string {
// Generate the inner token
token := genInnerToken(page)

9
browseajax/get.go

@ -1,9 +0,0 @@ @@ -1,9 +0,0 @@
package browseajax
func GetPage(channelID string, page uint) ([]string, error) {
root, err := GrabPage(channelID, page)
if err != nil { return nil, err }
urls, err := ParsePage(root)
if err != nil { return nil, err }
return urls, nil
}

45
browseajax/grab.go

@ -1,45 +0,0 @@ @@ -1,45 +0,0 @@
package browseajax
import (
"net/http"
"github.com/terorie/yt-mango/common"
"errors"
"io/ioutil"
"github.com/valyala/fastjson"
)
const mainURL = "https://www.youtube.com/browse_ajax?ctoken="
func GrabPage(channelID string, page uint) (*fastjson.Value, error) {
// Generate page URL
token := GenerateToken(channelID, uint64(page))
url := mainURL + token
// Prepare request
req, err := http.NewRequest("GET", url, nil)
if err != nil { return nil, err }
req.Header.Add("X-YouTube-Client-Name", "1")
req.Header.Add("X-YouTube-Client-Version", "2.20180726")
// Send request
res, err := common.Client.Do(req)
if err != nil { return nil, err }
if res.StatusCode == 500 {
defer res.Body.Close()
buf, _ := ioutil.ReadAll(res.Body)
println(string(buf))
}
if res.StatusCode != 200 { return nil, errors.New("HTTP failure") }
// Download response
defer res.Body.Close()
buf, err := ioutil.ReadAll(res.Body)
if err != nil { return nil, err }
// Parse JSON
var p fastjson.Parser
root, err := p.ParseBytes(buf)
if err != nil { return nil, err }
return root, nil
}

22
cmd/channel.go

@ -0,0 +1,22 @@ @@ -0,0 +1,22 @@
package cmd
import (
"github.com/spf13/cobra"
"regexp"
)
var force bool
var offset uint32
var Channel = cobra.Command{
Use: "channel",
Short: "Get information about a channel",
}
var matchChannelID = regexp.MustCompile("^([\\w\\-]|(%3[dD]))+$")
func init() {
channelDumpCmd.Flags().BoolVarP(&force, "force", "f", false, "Overwrite the output file if it already exists")
channelDumpCmd.Flags().Uint32Var(&offset, "page-offset", 1, "Start getting videos at this page. (A page is usually 30 videos)")
Channel.AddCommand(&channelDumpCmd)
}

27
channel.go → cmd/channeldump.go

@ -1,28 +1,17 @@ @@ -1,28 +1,17 @@
package main
package cmd
import (
"github.com/spf13/cobra"
"github.com/terorie/yt-mango/browseajax"
"regexp"
"net/url"
"fmt"
"os"
"net/url"
"strings"
"log"
"time"
"bufio"
"log"
"github.com/terorie/yt-mango/apijson"
)
var force bool
var offset uint32
var channelCmd = cobra.Command{
Use: "channel",
Short: "Get information about a channel",
}
var matchChannelID = regexp.MustCompile("^([\\w\\-]|(%3[dD]))+$")
var channelDumpCmd = cobra.Command{
Use: "dumpurls <channel ID> <file>",
Short: "Get all public video URLs from channel",
@ -85,7 +74,7 @@ var channelDumpCmd = cobra.Command{ @@ -85,7 +74,7 @@ var channelDumpCmd = cobra.Command{
totalURLs := 0
for i := offset; true; i++ {
channelURLs, err := browseajax.GetPage(channelID, uint(i))
channelURLs, err := apijson.GetChannelVideoURLs(channelID, uint(i))
if err != nil {
log.Printf("Aborting on error %v.", err)
break
@ -107,9 +96,3 @@ var channelDumpCmd = cobra.Command{ @@ -107,9 +96,3 @@ var channelDumpCmd = cobra.Command{
log.Printf("Got %d URLs in %s.", totalURLs, duration.String())
},
}
func init() {
channelDumpCmd.Flags().BoolVarP(&force, "force", "f", false, "Overwrite the output file if it already exists")
channelDumpCmd.Flags().Uint32Var(&offset, "page-offset", 1, "Start getting videos at this page. (A page is usually 30 videos)")
channelCmd.AddCommand(&channelDumpCmd)
}

12
cmd/video.go

@ -0,0 +1,12 @@ @@ -0,0 +1,12 @@
package cmd
import "github.com/spf13/cobra"
var Video = cobra.Command{
Use: "video",
Short: "Get information about a video",
}
func init() {
Video.AddCommand(&videoDetailCmd)
}

14
cmd/videodetail.go

@ -0,0 +1,14 @@ @@ -0,0 +1,14 @@
package cmd
import "github.com/spf13/cobra"
var videoDetailCmd = cobra.Command{
Use: "detail <video ID> [file]",
Short: "Get details about a video",
Run: func(cmd *cobra.Command, args []string) {
},
}
func init() {
}

6
data/channel.go

@ -0,0 +1,6 @@ @@ -0,0 +1,6 @@
package data
type Channel struct {
ID string `json:"id"`
Name string `json:"name"`
}

29
main.go

@ -1,6 +1,5 @@ @@ -1,6 +1,5 @@
/* youtube-ma for MongoDB
*
* Based on https://github.com/CorentinB/youtube-ma */
// yt-mango: YT video metadata archiving utility
// Copyright (C) 2018 terorie
package main
@ -8,6 +7,8 @@ import ( @@ -8,6 +7,8 @@ import (
"github.com/spf13/cobra"
"fmt"
"os"
"github.com/terorie/yt-mango/cmd"
"log"
)
const Version = "v0.1 -- dev"
@ -17,21 +18,27 @@ func printVersion(_ *cobra.Command, _ []string) { @@ -17,21 +18,27 @@ func printVersion(_ *cobra.Command, _ []string) {
}
func main() {
// All diagnostics (logging) should go to stderr
log.SetOutput(os.Stderr)
var printVersion bool
rootCmd := cobra.Command{
Use: "yt-mango",
Short: "YT-Mango is a scalable video metadata archiver",
Long: "YT-Mango is a scalable video metadata archiving utility\n" +
"written by terorie for https://the-eye.eu/",
PreRun: func(cmd *cobra.Command, args []string) {
if printVersion {
fmt.Println(Version)
os.Exit(0)
}
},
}
rootCmd.Flags().BoolVar(&printVersion, "version", false,
fmt.Sprintf("Print the version (" + Version +") and exit"), )
versionCmd := cobra.Command{
Use: "version",
Short: "Get the version number of yt-mango",
Run: printVersion,
}
rootCmd.AddCommand(&versionCmd)
rootCmd.AddCommand(&channelCmd)
rootCmd.AddCommand(&cmd.Channel)
rootCmd.AddCommand(&cmd.Video)
if err := rootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err)

Loading…
Cancel
Save