Browse Source

more detailed stats tracking; fix a bug in the fail handling

master
Aaron Miller 4 years ago
parent
commit
220320a5b5
3 changed files with 22 additions and 10 deletions
  1. 3
    1
      .gitignore
  2. 9
    6
      lib/fetch.js
  3. 10
    3
      spider.js

+ 3
- 1
.gitignore View File

@@ -1,3 +1,5 @@
node_modules/*
images/
.hg*
.hg*
*.tar.bz2
2015-*

+ 9
- 6
lib/fetch.js View File

@@ -20,6 +20,7 @@ var request = require('request');
var started = false;
var unresolved = {};

var lastUri = '(unstarted)';
var passes = 0;
var kbytes = 0;
var fails = [];
@@ -29,10 +30,10 @@ var stats = [];

// NB times in milliseconds
var options = {
loopSleep: 1000,
loopSleep: 15000,
verbose: false,
// Set this to 0 if you're an asshole.
fetchDelay: 200
fetchDelay: 250
};

var log = (options.verbose ? console.log : function () {});
@@ -68,6 +69,7 @@ function fetch(uri, action, extra) {
// Wait for options.fetchDelay milliseconds, then fire off the
// fetch.
Q.delay(options.fetchDelay).then(function() {
lastUri = uri;
request(
{
method: 'GET',
@@ -108,7 +110,7 @@ function fetch(uri, action, extra) {
// Handle a failed fetch (request gave back an error, or
// response code isn't 2xx).
console.error('failed ' + uri
+ ': status ' + response.statusCode
+ ': status ' + (response ? response.statusCode : 'NONE')
+ ', err ' + error);
// Stuff the fetch information into the fails list.
fails.push({
@@ -140,8 +142,9 @@ function fetch(uri, action, extra) {
*/
function watch() {
var defer = Q.defer();
var qlen = Object.keys(unresolved).length;

if (!started || Object.keys(unresolved).length > 0) {
if (!started || qlen > 0) {
// If the fetcher hasn't started yet (no fetches queued), or if
// any fetches are currently in flight, then just report stats,
// wait, and go around the loop again.
@@ -150,8 +153,8 @@ function watch() {
}).join('')
+ passes + ' gets, '
+ fails.length + ' fails, '
+ Math.floor(kbytes/1024) + ' M total'
);
+ Math.floor(kbytes/1024) + 'M, '
+ 'fetchq ' + qlen);

if (fails.length > 0) {
console.log('pushing a retry');

+ 10
- 3
spider.js View File

@@ -38,6 +38,7 @@ var basePath = './images/';

var collisions = 0;
var saved = 0;
var saveQ = 0;
var start = util.timestamp();

// Register some custom stats functions into the watcher's stats
@@ -46,12 +47,15 @@ var start = util.timestamp();
fetcher.registerStats(function statsRunTime() {
return (util.timestamp() - start) + ' seconds';
});
fetcher.registerStats(function statsColln () {
return collisions.toString(10) + ' coll';
});
fetcher.registerStats(function statsSaveQ () {
return saveQ.toString(10) + ' saveq';
});
fetcher.registerStats(function statsSaved () {
return saved.toString(10) + ' saved';
});
fetcher.registerStats(function statsColln () {
return collisions.toString(10) + ' name coll';
});

// Start the fetcher's monitoring loop; it will continue until all
// recursive downloads are complete.
@@ -197,6 +201,8 @@ function parseSheetPage(uri, response, body, sheetInfo) {

// ...and queue a download for the sheet image, with a suitable
// output filename so we know how to find it.
saveQ++;
fetcher.fetch(sheetUri, saveImage, {
path: [basePath,
util.makeSafePathPart(sheetInfo.nav),
@@ -253,5 +259,6 @@ function saveImage(uri, response, body, fileinfo) {
fs.writeFile(filePath, body, 'binary', function(err) {
if (err) throw err;
saved++;
saveQ--;
});
}

Loading…
Cancel
Save