Skip to content

Commit 55de64b

Browse files
author
lanzer
committed
Fix issue with thresher not handling HTTP requests outside of status 200
Abnormal status will be returned with the status message string ContentMine/quickscrape#62
1 parent 43e8c8f commit 55de64b

File tree

2 files changed

+6
-0
lines changed

2 files changed

+6
-0
lines changed

lib/renderer/basic.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ BasicRenderer.prototype.render = function(url, actions, cookiejar) {
1616
renderer.emit('renderer.urlRendered', url, body);
1717
} else if (error) {
1818
this.emit('error', error);
19+
} else if (response.statusCode != 200) {
20+
renderer.emit ('renderer.status', response.statusMessage);
1921
}
2022
});
2123
}

lib/scraper.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,10 @@ Scraper.prototype.scrapeUrl = function(theUrl, node) {
249249
var renderer = scraper.newRenderer();
250250
renderer.render(theUrl, scraper.actions);
251251
scraper.ticker.elongate();
252+
renderer.on('renderer.status', function(message) {
253+
scraper.emit('urlRendered',message);
254+
scraper.ticker.tick();
255+
});
252256
renderer.on('renderer.urlRendered', function(theUrl, html) {
253257
scraper.emit('urlRendered', theUrl);
254258
// the children of the root node have no dependencies, so we scrape

0 commit comments

Comments
 (0)