Untitled

mail@pastecode.io avatar
unknown
javascript
3 years ago
1.6 kB
3
Indexable
Never
function extract_file_url(month_choose_elem) {
    var url_count = month_choose_elem.childElementCount;
    var url_list = [];
    for (var i=0; i<url_count; i++) {
        url_elem = month_choose_elem.children[i];
        url_list.push(url_elem.getAttribute("objurl"));
    }
    return url_list;
}

function build_fileinfo(file_url_list, src_filename) {
    fileinfo = {}
    filename_base = "table";
    filename_suffix = "xlsx";
    for (var i=0; i<file_url_list.length; i++) {
        filename = filename_base + "_" + String(i) + "." + filename_suffix;
        fileinfo[filename] = {};
        fileinfo[filename]["url"] = file_url_list[i];
    }
    return fileinfo;
}

var genie = require('genie').create({ xpathVersion: '2.0' });
var x = require('casper').selectXPath;
var ex = genie.extendedXpath(x);
var blacklist = ['.css', '.png', '.gif', '.jpeg', 'google-analytics', 'yandex'];
var files = genie.files();

var base_url = "http://www.moa.gov.cn/ztzl/szcpxx/jdsj/";
// how to navigate to the url above and enable me to use ex function like below? thank you for helping!
var month_choose_elem = ex("(//li[@data-id='xzjd']//ul[@class='newChangeList'])[2]")[0];
var file_url_list = extract_file_url(base_url);
var fileinfo = build_fileinfo(file_url_list);

files.blacklist(blacklist)
    .retry(5)
    .add(fileinfo)
    .steps(function (parameters) {
        // this.thenOpen(parameters.url);
        // this.waitAndClick(ex("//a[contains(text(), '"+parameters.source_file_name+"')]"));
    });

genie.run(files);