0

I'm trying to modify this script: https://github.com/ariya/phantomjs/blob/master/examples/render_multi_url.js so that instead of "rendermulti-1.png (and so on)" the output files will be named for the web pages they are caps of.

Here's what I tried.

    // Render Multiple URLs to file

var RenderUrlsToFile, arrayOfUrls, system;

system = require("system");

/*
Render given urls
@param array of URLs to render
@param callbackPerUrl Function called after finishing each URL, including the last URL
@param callbackFinal Function called after finishing everything 
*/
RenderUrlsToFile = function(urls, callbackPerUrl, callbackFinal) {
    var getFilename, next, page, retrieve, urlIndex, webpage, pagename ; //<--
    urlIndex = 0;
    webpage = require("webpage");
    page = null;
    // replace forward slashes with underscores          //<--
    pagename = arrayOfUrls[urlIndex].replace(/\//g,'_'); //<--
    getFilename = function() {
    //  return "rendermulti-" + urlIndex + ".png";       //<--
        return pagename + ".png";                        //<--
    };
    next = function(status, url, file) {
        page.close();
        callbackPerUrl(status, url, file);
        return retrieve();
    };
    retrieve = function() {
        var url;
        if (urls.length > 0) {
            url = urls.shift();
            urlIndex++;
            page = webpage.create();
            page.viewportSize = {
                width: 800,
                height: 600
            };
            page.settings.userAgent = "Phantom.js bot";
            return page.open("http://" + url, function(status) {
                var file;
                file = getFilename();
                if (status === "success") {
                    return window.setTimeout((function() {
                        page.render(file);
                        return next(status, url, file);
                    }), 200);
                } else {
                    return next(status, url, file);
                }
            });
        } else {
            return callbackFinal();
        }
    };
    return retrieve();
};

arrayOfUrls = null;

if (system.args.length > 1) {
    arrayOfUrls = Array.prototype.slice.call(system.args, 1);
} else {
    console.log("Usage: phantomjs render_multi_url.js [domain.name1, domain.name2, ...]");
    arrayOfUrls = ["www.google.com", "www.bbc.co.uk", "www.phantomjs.org"];
}

RenderUrlsToFile(arrayOfUrls, (function(status, url, file) {
    if (status !== "success") {
        return console.log("Unable to render '" + url + "'");
    } else {
        return console.log("Rendered '" + url + "' at '" + file + "'");
    }
}), function() {
    return phantom.exit();
});

The script runs, but names all files after the first supplied URL and ignores anything after the '/'.

I suspect I'm making some basic error, possibly something to do with scope, but when I move the new variable into the getFileName function things break.

Any help at all will be appreciated.

Thanks!

2 Answers 2

2

There are calls to urls.shift() so it's not a good approach to compute filname with arrayOfUrls.

You're true : '/' is an invalid character so you have to replace it with a generic token (as ?, ...).

Alos, because page.open already contains url, it's better to use it to compute filename.

A solution could be :

// Render Multiple URLs to file

var RenderUrlsToFile, arrayOfUrls, system;

system = require("system");

/*
Render given urls
@param array of URLs to render
@param callbackPerUrl Function called after finishing each URL, including the last URL
@param callbackFinal Function called after finishing everything 
*/
RenderUrlsToFile = function(urls, callbackPerUrl, callbackFinal) {
    var getFilename, next, page, retrieve, urlIndex, webpage, pagename ; //<--
    urlIndex = 0;
    webpage = require("webpage");
    page = null;

    getFilename = function(url) {
        return url.replace(/\//g,'_')+  ".png";  //<--
    };
    next = function(status, url, file) {
        page.close();
        callbackPerUrl(status, url, file);
        return retrieve();
    };
    retrieve = function() {
        var url;
        if (urls.length > 0) {
            url = urls.shift();
            urlIndex++;
            page = webpage.create();
            page.viewportSize = {
                width: 800,
                height: 600
            };
            page.settings.userAgent = "Phantom.js bot";
            return page.open("http://" + url, function(status) {
                var file;
                file = getFilename(url);
                if (status === "success") {
                    return window.setTimeout((function() {
                        page.render(file);
                        return next(status, url, file);
                    }), 200);
                } else {
                    return next(status, url, file);
                }
            });
        } else {
            return callbackFinal();
        }
    };
    return retrieve();
};

arrayOfUrls = null;

if (system.args.length > 1) {
    arrayOfUrls = Array.prototype.slice.call(system.args, 1);
} else {
    console.log("Usage: phantomjs render_multi_url.js [domain.name1, domain.name2, ...]");
    arrayOfUrls = ["www.google.com", "www.bbc.co.uk", "www.phantomjs.org"];
}

RenderUrlsToFile(arrayOfUrls, (function(status, url, file) {
    if (status !== "success") {
        return console.log("Unable to render '" + url + "'");
    } else {
        return console.log("Rendered '" + url + "' at '" + file + "'");
    }
}), function() {
    return phantom.exit();
});
Sign up to request clarification or add additional context in comments.

1 Comment

Thank you! A perfect solution, and enough explanation to guide me to further study. Cheers!
0

yes, I think you could just modify this method

getFilename = function() {
    return "rendermulti-" + urlIndex + ".png";       //<--
    return pagename + ".png";                        //<--
};

to customer your output file names.

PS. I want to ask some ones, do you get this issue: if you give this js to render 90 html files, but most time it couldn't render them all, I need execute many times to finished all those 90 files.

and I try to split my html files into small array, then to call this js to render, but in this js file, there is a exit method, it would exit my process. so I couldn't loop all these small html file url array to finish them one time.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.