How To Scrape Javascript Injected Image Src And Alt With Phantom.js?
I'm using the following script to scrape images using phantom.js: var page = require('webpage').create(); url = 'https://www.everlane.com/collections/mens-luxury-tees/products/mens
Solution 1:
You need to evaluate your page after the page has finished loading. You can do this by using the page.onLoadFinished
callback. This callback is called after all page content is finished loading and the document is ready. Something like this should work:
var page = require('webpage').create();
var url = 'https://www.everlane.com/collections/mens-luxury-tees/products/mens-crew-antique';
page.open(url);
page.onLoadFinished = function()
{
var a = page.evaluate(function() {
return document.getElementsByTagName('img');
});
SrcAlt = [];
for (var i=0; i<a.length; i++){
var src = a[i].getAttribute('src');
var alt = a[i].getAttribute('alt');
SrcAlt.push({"src": src, "alt": alt});
}
console.log(SrcAlt);
phantom.exit();
}
Post a Comment for "How To Scrape Javascript Injected Image Src And Alt With Phantom.js?"