Browse Source

Channel url dump CLI

master
terorie 1 year ago
parent
commit
0429c2f1ce
4 changed files with 149 additions and 16 deletions
  1. 5
    5
      browseajax/get.go
  2. 27
    10
      browseajax/parse.go
  3. 115
    0
      channel.go
  4. 2
    1
      main.go

+ 5
- 5
browseajax/get.go View File

@@ -1,9 +1,9 @@
package browseajax

func GetPage(channelID string, page uint) error {
func GetPage(channelID string, page uint) ([]string, error) {
root, err := GrabPage(channelID, page)
if err != nil { return err }
err = ParsePage(root)
if err != nil { return err }
return nil
if err != nil { return nil, err }
urls, err := ParsePage(root)
if err != nil { return nil, err }
return urls, nil
}

+ 27
- 10
browseajax/parse.go View File

@@ -3,14 +3,16 @@ package browseajax
import (
"github.com/valyala/fastjson"
"errors"
"strings"
)

var missingData = errors.New("missing data")
var MissingData = errors.New("missing data")
var ServerError = errors.New("server error")

func ParsePage(rootObj *fastjson.Value) error {
func ParsePage(rootObj *fastjson.Value) ([]string, error) {
// Root as array
root, err := rootObj.Array()
if err != nil { return err }
if err != nil { return nil, err }

// Find response container
var container *fastjson.Value
@@ -20,9 +22,18 @@ func ParsePage(rootObj *fastjson.Value) error {
break
}
}
if container == nil { return missingData }
if container == nil { return nil, MissingData
}

// Get error obj
errorExists := container.Exists(
"response",
"responseContext",
"errors",
"error",
)
if errorExists { return nil, ServerError
}

// Get items from grid
itemsObj := container.Get(
@@ -31,11 +42,14 @@ func ParsePage(rootObj *fastjson.Value) error {
"gridContinuation",
"items",
)
if itemsObj == nil { return missingData }
if itemsObj == nil { return nil, MissingData
}

// Items as array
items, err := itemsObj.Array()
if err != nil { return err }
if err != nil { return nil, err }

urls := make([]string, 0)

// Enumerate
for _, item := range items {
@@ -47,14 +61,17 @@ func ParsePage(rootObj *fastjson.Value) error {
"webCommandMetadata",
"url",
)
if urlObj == nil { return missingData }
if urlObj == nil { return nil, MissingData
}

// URL as string
urlBytes, err := urlObj.StringBytes()
if err != nil { return err }
if err != nil { return nil, err }
url := string(urlBytes)

println(url)
if strings.HasPrefix(url, "/watch?v") {
urls = append(urls, "https://www.youtube.com" + url)
}
}
return nil
return urls, nil
}

+ 115
- 0
channel.go View File

@@ -0,0 +1,115 @@
package main

import (
"github.com/spf13/cobra"
"github.com/terorie/yt-mango/browseajax"
"regexp"
"fmt"
"os"
"net/url"
"strings"
"log"
"time"
"bufio"
)

var force bool
var offset uint32

var channelCmd = cobra.Command{
Use: "channel",
Short: "Get information about a channel",
}

var matchChannelID = regexp.MustCompile("^([\\w\\-]|(%3[dD]))+$")

var channelDumpCmd = cobra.Command{
Use: "dumpurls <channel ID> <file>",
Short: "Get all public video URLs from channel",
Long: "Write all videos URLs of a channel to a file",
Args: cobra.ExactArgs(2),
Run: func(cmd *cobra.Command, args []string) {
channelID := args[0]
fileName := args[1]

if !matchChannelID.MatchString(channelID) {
// Check if youtube.com domain
_url, err := url.Parse(channelID)
if err != nil || (_url.Host != "www.youtube.com" && _url.Host != "youtube.com") {
fmt.Fprintln(os.Stderr, "Not a channel ID:", channelID)
os.Exit(1)
}

// Check if old /user/ URL
if strings.HasPrefix(_url.Path, "/user/") {
// TODO Implement extraction of channel ID
fmt.Fprintln(os.Stderr, "New /channel/ link is required!\n" +
"The old /user/ links do not work.")
os.Exit(1)
}

// Remove /channel/ path
channelID = strings.TrimPrefix(_url.Path, "/channel/")
if len(channelID) == len(_url.Path) {
// No such prefix to be removed
fmt.Fprintln(os.Stderr, "Not a channel ID:", channelID)
os.Exit(1)
}

// Remove rest of path from channel ID
slashIndex := strings.IndexRune(channelID, '/')
if slashIndex != -1 {
channelID = channelID[:slashIndex]
}
}

log.Printf("Starting work on channel ID \"%s\".", channelID)
startTime := time.Now()

var flags int
if force {
flags = os.O_WRONLY | os.O_CREATE | os.O_TRUNC
} else {
flags = os.O_WRONLY | os.O_CREATE | os.O_EXCL
}

file, err := os.OpenFile(fileName, flags, 0640)
defer file.Close()
writer := bufio.NewWriter(file)
defer writer.Flush()

if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}

totalURLs := 0
for i := offset; true; i++ {
channelURLs, err := browseajax.GetPage(channelID, uint(i))
if err != nil {
log.Printf("Aborting on error %v.", err)
break
}
if len(channelURLs) == 0 {
log.Printf("Page %d returned no videos.", i)
break
}
totalURLs += len(channelURLs)
log.Printf("Received page %d: %d videos.", i, len(channelURLs))

for _, _url:= range channelURLs {
_, err := writer.WriteString(_url + "\n")
if err != nil { panic(err) }
}
}

duration := time.Since(startTime)
log.Printf("Got %d URLs in %s.", totalURLs, duration.String())
},
}

func init() {
channelDumpCmd.Flags().BoolVarP(&force, "force", "f", false, "Overwrite the output file if it already exists")
channelDumpCmd.Flags().Uint32Var(&offset, "page-offset", 1, "Start getting videos at this page. (A page is usually 30 videos)")
channelCmd.AddCommand(&channelDumpCmd)
}

+ 2
- 1
main.go View File

@@ -21,7 +21,7 @@ func main() {
Use: "yt-mango",
Short: "YT-Mango is a scalable video metadata archiver",
Long: "YT-Mango is a scalable video metadata archiving utility\n" +
"written by terorie with help from the-eye.eu",
"written by terorie for https://the-eye.eu/",
}

versionCmd := cobra.Command{
@@ -31,6 +31,7 @@ func main() {
}

rootCmd.AddCommand(&versionCmd)
rootCmd.AddCommand(&channelCmd)

if err := rootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err)

Loading…
Cancel
Save