|
|
@@ -8,148 +8,171 @@ import ( |
|
|
|
"log" |
|
|
|
"github.com/terorie/yt-mango/api" |
|
|
|
"fmt" |
|
|
|
"github.com/terorie/yt-mango/common" |
|
|
|
"github.com/terorie/yt-mango/net" |
|
|
|
"sync/atomic" |
|
|
|
"errors" |
|
|
|
"sync" |
|
|
|
) |
|
|
|
|
|
|
|
var channelDumpContext = struct{ |
|
|
|
var offset uint |
|
|
|
|
|
|
|
func init() { |
|
|
|
channelDumpCmd.Flags().UintVar(&offset, "page-offset", 1, "Start getting videos at this page. (A page is usually 30 videos)") |
|
|
|
} |
|
|
|
|
|
|
|
// The shared context of the request and response threads |
|
|
|
var channelDumpContext = struct { |
|
|
|
startTime time.Time |
|
|
|
printResults bool |
|
|
|
writer *bufio.Writer |
|
|
|
pagesDone uint64 |
|
|
|
errorOccured int32 // Use atomic boolean here |
|
|
|
// Number of pages that have been |
|
|
|
// requested but not yet received. |
|
|
|
// Additional +1 is added if additional |
|
|
|
// are planned to be requested |
|
|
|
pagesToReceive sync.WaitGroup |
|
|
|
// If set to non-zero, an error was received |
|
|
|
errorOccurred int32 |
|
|
|
}{} |
|
|
|
|
|
|
|
// The channel dump route lists |
|
|
|
var channelDumpCmd = cobra.Command{ |
|
|
|
Use: "dumpurls <channel ID> [file]", |
|
|
|
Short: "Get all public video URLs from channel", |
|
|
|
Long: "Write all videos URLs of a channel to a file", |
|
|
|
Args: cobra.RangeArgs(1, 2), |
|
|
|
Run: func(cmd *cobra.Command, args []string) { |
|
|
|
printResults := false |
|
|
|
fileName := "" |
|
|
|
channelID := args[0] |
|
|
|
if len(args) != 2 { |
|
|
|
printResults = true |
|
|
|
} else { |
|
|
|
fileName = args[1] |
|
|
|
} |
|
|
|
channelDumpContext.printResults = printResults |
|
|
|
Run: doChannelDump, |
|
|
|
} |
|
|
|
|
|
|
|
channelID, err := api.GetChannelID(channelID) |
|
|
|
if err != nil { |
|
|
|
log.Print(err) |
|
|
|
os.Exit(1) |
|
|
|
} |
|
|
|
func doChannelDump(_ *cobra.Command, args []string) { |
|
|
|
if offset == 0 { offset = 1 } |
|
|
|
|
|
|
|
log.Printf("Starting work on channel ID \"%s\".", channelID) |
|
|
|
channelDumpContext.startTime = time.Now() |
|
|
|
printResults := false |
|
|
|
fileName := "" |
|
|
|
channelID := args[0] |
|
|
|
if len(args) != 2 { |
|
|
|
printResults = true |
|
|
|
} else { |
|
|
|
fileName = args[1] |
|
|
|
} |
|
|
|
channelDumpContext.printResults = printResults |
|
|
|
|
|
|
|
var flags int |
|
|
|
if force { |
|
|
|
flags = os.O_WRONLY | os.O_CREATE | os.O_TRUNC |
|
|
|
} else { |
|
|
|
flags = os.O_WRONLY | os.O_CREATE | os.O_EXCL |
|
|
|
} |
|
|
|
channelID, err := api.GetChannelID(channelID) |
|
|
|
if err != nil { |
|
|
|
log.Print(err) |
|
|
|
os.Exit(1) |
|
|
|
} |
|
|
|
|
|
|
|
var file *os.File |
|
|
|
log.Printf("Starting work on channel ID \"%s\".", channelID) |
|
|
|
channelDumpContext.startTime = time.Now() |
|
|
|
|
|
|
|
if !printResults { |
|
|
|
var err error |
|
|
|
file, err = os.OpenFile(fileName, flags, 0640) |
|
|
|
if err != nil { |
|
|
|
log.Fatal(err) |
|
|
|
os.Exit(1) |
|
|
|
} |
|
|
|
defer file.Close() |
|
|
|
var flags int |
|
|
|
if force { |
|
|
|
flags = os.O_WRONLY | os.O_CREATE | os.O_TRUNC |
|
|
|
} else { |
|
|
|
flags = os.O_WRONLY | os.O_CREATE | os.O_EXCL |
|
|
|
} |
|
|
|
|
|
|
|
var file *os.File |
|
|
|
|
|
|
|
writer := bufio.NewWriter(file) |
|
|
|
defer writer.Flush() |
|
|
|
channelDumpContext.writer = writer |
|
|
|
if !printResults { |
|
|
|
var err error |
|
|
|
file, err = os.OpenFile(fileName, flags, 0640) |
|
|
|
if err != nil { |
|
|
|
log.Fatal(err) |
|
|
|
os.Exit(1) |
|
|
|
} |
|
|
|
defer file.Close() |
|
|
|
|
|
|
|
results := make(chan common.JobResult) |
|
|
|
terminateSub := make(chan bool) |
|
|
|
writer := bufio.NewWriter(file) |
|
|
|
defer writer.Flush() |
|
|
|
channelDumpContext.writer = writer |
|
|
|
} |
|
|
|
|
|
|
|
// TODO Clean up |
|
|
|
go processResults(results, terminateSub) |
|
|
|
results := make(chan net.JobResult) |
|
|
|
terminateSub := make(chan bool) |
|
|
|
|
|
|
|
page := offset |
|
|
|
for { |
|
|
|
// Terminate if error detected |
|
|
|
if atomic.LoadInt32(&channelDumpContext.errorOccured) != 0 { |
|
|
|
goto terminate |
|
|
|
} |
|
|
|
// Send new requests |
|
|
|
req := api.Main.GrabChannelPage(channelID, page) |
|
|
|
common.DoAsyncHTTP(req, results, page) |
|
|
|
// TODO Clean up |
|
|
|
go channelDumpResults(results, terminateSub) |
|
|
|
|
|
|
|
page++ |
|
|
|
page := offset |
|
|
|
for { |
|
|
|
// Terminate if error detected |
|
|
|
if atomic.LoadInt32(&channelDumpContext.errorOccurred) != 0 { |
|
|
|
goto terminate |
|
|
|
} |
|
|
|
terminate: |
|
|
|
// Send new requests |
|
|
|
req := api.Main.GrabChannelPage(channelID, page) |
|
|
|
channelDumpContext.pagesToReceive.Add(1) |
|
|
|
net.DoAsyncHTTP(req, results, page) |
|
|
|
|
|
|
|
// Requests sent, wait for remaining requests to finish |
|
|
|
for { |
|
|
|
done := uint64(offset) + atomic.LoadUint64(&channelDumpContext.pagesDone) |
|
|
|
target := uint64(page) |
|
|
|
if done >= target { break } |
|
|
|
|
|
|
|
// TODO use semaphore |
|
|
|
time.Sleep(time.Millisecond) |
|
|
|
} |
|
|
|
page++ |
|
|
|
} |
|
|
|
terminate: |
|
|
|
|
|
|
|
// TODO Don't ignore pending results |
|
|
|
duration := time.Since(channelDumpContext.startTime) |
|
|
|
log.Printf("Done in %s.", duration.String()) |
|
|
|
// Requests sent, wait for remaining requests to finish |
|
|
|
channelDumpContext.pagesToReceive.Wait() |
|
|
|
|
|
|
|
terminateSub <- true |
|
|
|
}, |
|
|
|
terminateSub <- true |
|
|
|
} |
|
|
|
|
|
|
|
// TODO combine channels into one |
|
|
|
func processResults(results chan common.JobResult, terminateSub chan bool) { |
|
|
|
// Helper goroutine that processes HTTP results. |
|
|
|
// HTTP results are received on "results". |
|
|
|
// The routine exits if a value on "terminateSub" is received. |
|
|
|
// For every incoming result (error or response), |
|
|
|
// the "pagesToReceive" counter is decreased. |
|
|
|
// If an error is received, the "errorOccurred" flag is set. |
|
|
|
func channelDumpResults(results chan net.JobResult, terminateSub chan bool) { |
|
|
|
totalURLs := 0 |
|
|
|
for { |
|
|
|
select { |
|
|
|
case <-terminateSub: |
|
|
|
log.Printf("Got %d URLs", totalURLs) |
|
|
|
duration := time.Since(channelDumpContext.startTime) |
|
|
|
log.Printf("Got %d URLs in %s.", totalURLs, duration.String()) |
|
|
|
os.Exit(0) |
|
|
|
return |
|
|
|
case res := <-results: |
|
|
|
var err error |
|
|
|
var channelURLs []string |
|
|
|
page := res.ReqData.(uint) |
|
|
|
if res.Err != nil { |
|
|
|
err = res.Err |
|
|
|
goto endError |
|
|
|
} |
|
|
|
channelURLs, err = api.Main.ParseChannelVideoURLs(res.Res) |
|
|
|
if err != nil { goto endError } |
|
|
|
if len(channelURLs) == 0 { |
|
|
|
err = errors.New("returned no videos") |
|
|
|
goto endError |
|
|
|
} |
|
|
|
totalURLs += len(channelURLs) |
|
|
|
log.Printf("Received page %d: %d videos.", page, len(channelURLs)) |
|
|
|
|
|
|
|
if channelDumpContext.printResults { |
|
|
|
for _, _url := range channelURLs { |
|
|
|
fmt.Println(_url) |
|
|
|
} |
|
|
|
page, numURLs, err := channelDumpResult(&res) |
|
|
|
// Mark page as processed |
|
|
|
channelDumpContext.pagesToReceive.Done() |
|
|
|
// Report back error |
|
|
|
if err != nil { |
|
|
|
atomic.StoreInt32(&channelDumpContext.errorOccurred, 1) |
|
|
|
log.Printf("Error at page %d: %v", page, err) |
|
|
|
} else { |
|
|
|
for _, _url := range channelURLs { |
|
|
|
_, err := channelDumpContext.writer.WriteString(_url + "\n") |
|
|
|
if err != nil { panic(err) } |
|
|
|
} |
|
|
|
totalURLs += numURLs |
|
|
|
} |
|
|
|
// Increment done pages count |
|
|
|
atomic.AddUint64(&channelDumpContext.pagesDone, 1) |
|
|
|
continue |
|
|
|
endError: |
|
|
|
atomic.AddUint64(&channelDumpContext.pagesDone, 1) |
|
|
|
atomic.StoreInt32(&channelDumpContext.errorOccured, 1) |
|
|
|
log.Printf("Error at page %d: %v", page, err) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// Processes a HTTP result |
|
|
|
func channelDumpResult(res *net.JobResult) (page uint, numURLs int, err error) { |
|
|
|
var channelURLs []string |
|
|
|
|
|
|
|
// Extra data is page number |
|
|
|
page = res.ReqData.(uint) |
|
|
|
// Abort if request failed |
|
|
|
if res.Err != nil { return page, 0, res.Err } |
|
|
|
|
|
|
|
// Parse response |
|
|
|
channelURLs, err = api.Main.ParseChannelVideoURLs(res.Res) |
|
|
|
if err != nil { return } |
|
|
|
numURLs = len(channelURLs) |
|
|
|
if numURLs == 0 { return page, 0, errors.New("returned no videos") } |
|
|
|
|
|
|
|
// Print results |
|
|
|
log.Printf("Received page %d: %d videos.", page, numURLs) |
|
|
|
|
|
|
|
if channelDumpContext.printResults { |
|
|
|
for _, _url := range channelURLs { |
|
|
|
fmt.Println(_url) |
|
|
|
} |
|
|
|
} else { |
|
|
|
for _, _url := range channelURLs { |
|
|
|
_, err := channelDumpContext.writer.WriteString(_url + "\n") |
|
|
|
if err != nil { panic(err) } |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return |
|
|
|
} |