You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

channeldump.go 4.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. package cmd
  2. import (
  3. "github.com/spf13/cobra"
  4. "os"
  5. "time"
  6. "bufio"
  7. "log"
  8. "github.com/terorie/yt-mango/api"
  9. "fmt"
  10. "github.com/terorie/yt-mango/net"
  11. "sync/atomic"
  12. "errors"
  13. "sync"
  14. )
  15. var offset uint
  16. func init() {
  17. channelDumpCmd.Flags().UintVar(&offset, "page-offset", 1, "Start getting videos at this page. (A page is usually 30 videos)")
  18. }
  19. // The shared context of the request and response threads
  20. var channelDumpContext = struct {
  21. startTime time.Time
  22. printResults bool
  23. writer *bufio.Writer
  24. // Number of pages that have been
  25. // requested but not yet received.
  26. // Additional +1 is added if additional
  27. // are planned to be requested
  28. pagesToReceive sync.WaitGroup
  29. // If set to non-zero, an error was received
  30. errorOccurred int32
  31. }{}
  32. // The channel dump route lists
  33. var channelDumpCmd = cobra.Command{
  34. Use: "dumpurls <channel ID> [file]",
  35. Short: "Get all public video URLs from channel",
  36. Long: "Write all videos URLs of a channel to a file",
  37. Args: cobra.RangeArgs(1, 2),
  38. Run: doChannelDump,
  39. }
  40. func doChannelDump(_ *cobra.Command, args []string) {
  41. if offset == 0 { offset = 1 }
  42. printResults := false
  43. fileName := ""
  44. channelID := args[0]
  45. if len(args) != 2 {
  46. printResults = true
  47. } else {
  48. fileName = args[1]
  49. }
  50. channelDumpContext.printResults = printResults
  51. channelID = api.GetChannelID(channelID)
  52. if channelID == "" { os.Exit(1) }
  53. log.Printf("Starting work on channel ID \"%s\".", channelID)
  54. channelDumpContext.startTime = time.Now()
  55. var flags int
  56. if force {
  57. flags = os.O_WRONLY | os.O_CREATE | os.O_TRUNC
  58. } else {
  59. flags = os.O_WRONLY | os.O_CREATE | os.O_EXCL
  60. }
  61. var file *os.File
  62. if !printResults {
  63. var err error
  64. file, err = os.OpenFile(fileName, flags, 0640)
  65. if err != nil {
  66. log.Fatal(err)
  67. os.Exit(1)
  68. }
  69. defer file.Close()
  70. writer := bufio.NewWriter(file)
  71. defer writer.Flush()
  72. channelDumpContext.writer = writer
  73. }
  74. results := make(chan net.JobResult)
  75. terminateSub := make(chan bool)
  76. // TODO Clean up
  77. go channelDumpResults(results, terminateSub)
  78. page := offset
  79. for {
  80. // Terminate if error detected
  81. if atomic.LoadInt32(&channelDumpContext.errorOccurred) != 0 {
  82. goto terminate
  83. }
  84. // Send new requests
  85. req := api.Main.GrabChannelPage(channelID, page)
  86. channelDumpContext.pagesToReceive.Add(1)
  87. net.DoAsyncHTTP(req, results, page)
  88. page++
  89. }
  90. terminate:
  91. // Requests sent, wait for remaining requests to finish
  92. channelDumpContext.pagesToReceive.Wait()
  93. terminateSub <- true
  94. }
  95. // Helper goroutine that processes HTTP results.
  96. // HTTP results are received on "results".
  97. // The routine exits if a value on "terminateSub" is received.
  98. // For every incoming result (error or response),
  99. // the "pagesToReceive" counter is decreased.
  100. // If an error is received, the "errorOccurred" flag is set.
  101. func channelDumpResults(results chan net.JobResult, terminateSub chan bool) {
  102. totalURLs := 0
  103. for {
  104. select {
  105. case <-terminateSub:
  106. duration := time.Since(channelDumpContext.startTime)
  107. log.Printf("Got %d URLs in %s.", totalURLs, duration.String())
  108. os.Exit(0)
  109. return
  110. case res := <-results:
  111. page, numURLs, err := channelDumpResult(&res)
  112. // Mark page as processed
  113. channelDumpContext.pagesToReceive.Done()
  114. // Report back error
  115. if err != nil {
  116. atomic.StoreInt32(&channelDumpContext.errorOccurred, 1)
  117. log.Printf("Error at page %d: %v", page, err)
  118. } else {
  119. totalURLs += numURLs
  120. }
  121. }
  122. }
  123. }
  124. // Processes a HTTP result
  125. func channelDumpResult(res *net.JobResult) (page uint, numURLs int, err error) {
  126. var channelURLs []string
  127. // Extra data is page number
  128. page = res.ReqData.(uint)
  129. // Abort if request failed
  130. if res.Err != nil { return page, 0, res.Err }
  131. // Parse response
  132. channelURLs, err = api.Main.ParseChannelVideoURLs(res.Res)
  133. if err != nil { return }
  134. numURLs = len(channelURLs)
  135. if numURLs == 0 { return page, 0, errors.New("returned no videos") }
  136. // Print results
  137. log.Printf("Received page %d: %d videos.", page, numURLs)
  138. if channelDumpContext.printResults {
  139. for _, _url := range channelURLs {
  140. fmt.Println(_url)
  141. }
  142. } else {
  143. for _, _url := range channelURLs {
  144. _, err := channelDumpContext.writer.WriteString(_url + "\n")
  145. if err != nil { panic(err) }
  146. }
  147. }
  148. return
  149. }