javascript - Looping over urls to do the same thing -


i tring scrape few sites. here code:

for (var = 0; < urls.length; i++) {     url = urls[i];     console.log("start scraping: " + url);      page.open(url, function () {         waitfor(function() {             return page.evaluate(function() {                 return document.getelementbyid("progresswrapper").childnodes.length == 1;             });          }, function() {             var price = page.evaluate(function() {                 //                 return price;             });              console.log(price);             result = url + " ; " + price;             output = output + "\r\n" + result;         });     });  } fs.write('test.txt', output); phantom.exit(); 

i want scrape sites in array urls, extract information , write information text file.

but there seems problem loop. when scraping 1 site without using loop, works want. loop, first nothing happens, line

console.log("start scraping: " + url); 

is shown, 1 time much. if url = {a,b,c}, phantomjs does:

start scraping:  start scraping: b  start scraping: c  start scraping: 

it seems page.open isn't called @ all. newbie js sorry stupid question.

phantomjs asynchronous. calling page.open() multiple times using loop, rush execution of callback. you're overwriting current request before finished new request again overwritten. need execute them 1 after other, example this:

page.open(url, function () {     waitfor(function() {        //     }, function() {         page.open(url, function () {             waitfor(function() {                //             }, function() {                 // , on             });         });     }); }); 

but tedious. there utilities can writing nicer code async.js. can install in directory of phantomjs script through npm.

var async = require("async"); // install async through npm var tests = urls.map(function(url){     return function(callback){         page.open(url, function () {             waitfor(function() {                //             }, function() {                 callback();             });         });     }; }); async.series(tests, function finish(){     fs.write('test.txt', output);     phantom.exit(); }); 

if don't want dependencies, easy define own recursive function (from here):

var urls = [/*....*/];  function handle_page(url){     page.open(url, function(){         waitfor(function() {            //         }, function() {             next_page();         });     }); }  function next_page(){     var url = urls.shift();     if(!urls){         phantom.exit(0);     }     handle_page(url); }  next_page(); 

Comments