Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

channeldump.go 4.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. package cmd
  2. import (
  3. "github.com/spf13/cobra"
  4. "os"
  5. "time"
  6. "bufio"
  7. "log"
  8. "github.com/terorie/yt-mango/api"
  9. "fmt"
  10. "github.com/terorie/yt-mango/net"
  11. "sync/atomic"
  12. "errors"
  13. "sync"
  14. )
  15. var offset uint
  16. func init() {
  17. channelDumpCmd.Flags().UintVar(&offset, "page-offset", 1, "Start getting videos at this page. (A page is usually 30 videos)")
  18. }
  19. // The shared context of the request and response threads
  20. var channelDumpContext = struct {
  21. startTime time.Time
  22. printResults bool
  23. writer *bufio.Writer
  24. // Number of pages that have been
  25. // requested but not yet received.
  26. // Additional +1 is added if additional
  27. // are planned to be requested
  28. pagesToReceive sync.WaitGroup
  29. // If set to non-zero, an error was received
  30. errorOccurred int32
  31. }{}
  32. // The channel dump route lists
  33. var channelDumpCmd = cobra.Command{
  34. Use: "dumpurls <channel ID> [file]",
  35. Short: "Get all public video URLs from channel",
  36. Long: "Write all videos URLs of a channel to a file",
  37. Args: cobra.RangeArgs(1, 2),
  38. Run: doChannelDump,
  39. }
  40. func doChannelDump(_ *cobra.Command, args []string) {
  41. if offset == 0 { offset = 1 }
  42. printResults := false
  43. fileName := ""
  44. channelID := args[0]
  45. if len(args) != 2 {
  46. printResults = true
  47. } else {
  48. fileName = args[1]
  49. }
  50. channelDumpContext.printResults = printResults
  51. channelID, err := api.GetChannelID(channelID)
  52. if err != nil {
  53. log.Print(err)
  54. os.Exit(1)
  55. }
  56. log.Printf("Starting work on channel ID \"%s\".", channelID)
  57. channelDumpContext.startTime = time.Now()
  58. var flags int
  59. if force {
  60. flags = os.O_WRONLY | os.O_CREATE | os.O_TRUNC
  61. } else {
  62. flags = os.O_WRONLY | os.O_CREATE | os.O_EXCL
  63. }
  64. var file *os.File
  65. if !printResults {
  66. var err error
  67. file, err = os.OpenFile(fileName, flags, 0640)
  68. if err != nil {
  69. log.Fatal(err)
  70. os.Exit(1)
  71. }
  72. defer file.Close()
  73. writer := bufio.NewWriter(file)
  74. defer writer.Flush()
  75. channelDumpContext.writer = writer
  76. }
  77. results := make(chan net.JobResult)
  78. terminateSub := make(chan bool)
  79. // TODO Clean up
  80. go channelDumpResults(results, terminateSub)
  81. page := offset
  82. for {
  83. // Terminate if error detected
  84. if atomic.LoadInt32(&channelDumpContext.errorOccurred) != 0 {
  85. goto terminate
  86. }
  87. // Send new requests
  88. req := api.Main.GrabChannelPage(channelID, page)
  89. channelDumpContext.pagesToReceive.Add(1)
  90. net.DoAsyncHTTP(req, results, page)
  91. page++
  92. }
  93. terminate:
  94. // Requests sent, wait for remaining requests to finish
  95. channelDumpContext.pagesToReceive.Wait()
  96. terminateSub <- true
  97. }
  98. // Helper goroutine that processes HTTP results.
  99. // HTTP results are received on "results".
  100. // The routine exits if a value on "terminateSub" is received.
  101. // For every incoming result (error or response),
  102. // the "pagesToReceive" counter is decreased.
  103. // If an error is received, the "errorOccurred" flag is set.
  104. func channelDumpResults(results chan net.JobResult, terminateSub chan bool) {
  105. totalURLs := 0
  106. for {
  107. select {
  108. case <-terminateSub:
  109. duration := time.Since(channelDumpContext.startTime)
  110. log.Printf("Got %d URLs in %s.", totalURLs, duration.String())
  111. os.Exit(0)
  112. return
  113. case res := <-results:
  114. page, numURLs, err := channelDumpResult(&res)
  115. // Mark page as processed
  116. channelDumpContext.pagesToReceive.Done()
  117. // Report back error
  118. if err != nil {
  119. atomic.StoreInt32(&channelDumpContext.errorOccurred, 1)
  120. log.Printf("Error at page %d: %v", page, err)
  121. } else {
  122. totalURLs += numURLs
  123. }
  124. }
  125. }
  126. }
  127. // Processes a HTTP result
  128. func channelDumpResult(res *net.JobResult) (page uint, numURLs int, err error) {
  129. var channelURLs []string
  130. // Extra data is page number
  131. page = res.ReqData.(uint)
  132. // Abort if request failed
  133. if res.Err != nil { return page, 0, res.Err }
  134. // Parse response
  135. channelURLs, err = api.Main.ParseChannelVideoURLs(res.Res)
  136. if err != nil { return }
  137. numURLs = len(channelURLs)
  138. if numURLs == 0 { return page, 0, errors.New("returned no videos") }
  139. // Print results
  140. log.Printf("Received page %d: %d videos.", page, numURLs)
  141. if channelDumpContext.printResults {
  142. for _, _url := range channelURLs {
  143. fmt.Println(_url)
  144. }
  145. } else {
  146. for _, _url := range channelURLs {
  147. _, err := channelDumpContext.writer.WriteString(_url + "\n")
  148. if err != nil { panic(err) }
  149. }
  150. }
  151. return
  152. }