Browse Source

Fix offset bug

Refactor channel ID parsing
master
terorie 4 years ago
parent
commit
6031763c67
  1. 49
      api/ids.go
  2. 3
      cmd/channel.go
  3. 41
      cmd/channeldump.go

49
api/ids.go

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
package api
import (
"regexp"
"os"
"strings"
"log"
"net/url"
)
var matchChannelID = regexp.MustCompile("^([\\w\\-]|(%3[dD]))+$")
func GetChannelID(chanURL string) (string, error) {
if !matchChannelID.MatchString(chanURL) {
// Check if youtube.com domain
_url, err := url.Parse(chanURL)
if err != nil || (_url.Host != "www.youtube.com" && _url.Host != "youtube.com") {
log.Fatal("Not a channel ID:", chanURL)
os.Exit(1)
}
// Check if old /user/ URL
if strings.HasPrefix(_url.Path, "/user/") {
// TODO Implement extraction of channel ID
log.Fatal("New /channel/ link is required!\n" +
"The old /user/ links do not work.")
os.Exit(1)
}
// Remove /channel/ path
channelID := strings.TrimPrefix(_url.Path, "/channel/")
if len(channelID) == len(_url.Path) {
// No such prefix to be removed
log.Fatal("Not a channel ID:", channelID)
os.Exit(1)
}
// Remove rest of path from channel ID
slashIndex := strings.IndexRune(channelID, '/')
if slashIndex != -1 {
channelID = channelID[:slashIndex]
}
return channelID, nil
} else {
// It's already a channel ID
return chanURL, nil
}
}

3
cmd/channel.go

@ -2,7 +2,6 @@ package cmd @@ -2,7 +2,6 @@ package cmd
import (
"github.com/spf13/cobra"
"regexp"
)
var force bool
@ -13,8 +12,6 @@ var Channel = cobra.Command{ @@ -13,8 +12,6 @@ var Channel = cobra.Command{
Short: "Get information about a channel",
}
var matchChannelID = regexp.MustCompile("^([\\w\\-]|(%3[dD]))+$")
func init() {
channelDumpCmd.Flags().BoolVarP(&force, "force", "f", false, "Overwrite the output file if it already exists")
channelDumpCmd.Flags().UintVar(&offset, "page-offset", 1, "Start getting videos at this page. (A page is usually 30 videos)")

41
cmd/channeldump.go

@ -2,9 +2,7 @@ package cmd @@ -2,9 +2,7 @@ package cmd
import (
"github.com/spf13/cobra"
"net/url"
"os"
"strings"
"time"
"bufio"
"log"
@ -39,35 +37,10 @@ var channelDumpCmd = cobra.Command{ @@ -39,35 +37,10 @@ var channelDumpCmd = cobra.Command{
}
channelDumpContext.printResults = printResults
if !matchChannelID.MatchString(channelID) {
// Check if youtube.com domain
_url, err := url.Parse(channelID)
if err != nil || (_url.Host != "www.youtube.com" && _url.Host != "youtube.com") {
log.Fatal("Not a channel ID:", channelID)
os.Exit(1)
}
// Check if old /user/ URL
if strings.HasPrefix(_url.Path, "/user/") {
// TODO Implement extraction of channel ID
log.Fatal("New /channel/ link is required!\n" +
"The old /user/ links do not work.")
os.Exit(1)
}
// Remove /channel/ path
channelID = strings.TrimPrefix(_url.Path, "/channel/")
if len(channelID) == len(_url.Path) {
// No such prefix to be removed
log.Fatal("Not a channel ID:", channelID)
os.Exit(1)
}
// Remove rest of path from channel ID
slashIndex := strings.IndexRune(channelID, '/')
if slashIndex != -1 {
channelID = channelID[:slashIndex]
}
channelID, err := api.GetChannelID(channelID)
if err != nil {
log.Print(err)
os.Exit(1)
}
log.Printf("Starting work on channel ID \"%s\".", channelID)
@ -115,13 +88,11 @@ var channelDumpCmd = cobra.Command{ @@ -115,13 +88,11 @@ var channelDumpCmd = cobra.Command{
page++
}
terminate:
log.Printf("&")
// Requests sent, wait for remaining requests to finish
for {
done := atomic.LoadUint64(&channelDumpContext.pagesDone)
// Page starts at 1
target := uint64(page) - 1
done := uint64(offset) + atomic.LoadUint64(&channelDumpContext.pagesDone)
target := uint64(page)
if done >= target { break }
// TODO use semaphore

Loading…
Cancel
Save