捕獲和轉換Web的工具

使用Node.js從網站捕獲HTML表

Node.js API

有多種轉換HTML表的方法 into使用JSON,CSV和Excel電子表格 GrabzIt的Node.js API,這裡詳細介紹了一些最有用的技術。 但是,在開始之前,請記住 url_to_table, html_to_table or file_to_table 方法 save or save_to 必須調用方法來捕獲表。 如果您想快速查看此服務是否適合您,可以嘗試 捕獲HTML表的實時演示 從URL。

基本選項

此特定方法調用將轉換指定URL網頁中的第一個HTML表, intoa CSV文件。 此代碼段將轉換在指定網頁或HTML輸入中找到的第一個HTML表 intoa CSV文件。

client.url_to_table("https://www.tesla.com");
//Then call the save or save_to method
client.html_to_table("<html><body><table><tr><th>Name</th><th>Age</th></tr>
    <tr><td>Tom</td><td>23</td></tr><tr><td>Nicola</td><td>26</td></tr>
    </table></body></html>");
//Then call the save or save_to method
client.file_to_table("tables.html");
//Then call the save or save_to method

默認情況下,它將轉換它標識的第一個表 intoa表。 但是,可以通過將2傳遞給網頁中的第二個表來進行轉換 tableNumberToInclude 屬性。

var grabzit = require('grabzit');

var client = new grabzit("Sign in to view your Application Key", "Sign in to view your Application Secret");

var options = {"tableNumberToInclude":2};

client.url_to_table("https://www.tesla.com", options);
//Then call the save or save_to method
client.save_to("result.csv", function (error, id){
    //this callback is called once the capture is downloaded
    if (error != null){
        throw error;
    }
});
var grabzit = require('grabzit');

var client = new grabzit("Sign in to view your Application Key", "Sign in to view your Application Secret");

var options = {"tableNumberToInclude":2};

client.html_to_table("<html><body><table><tr><th>Name</th><th>Age</th></tr>
    <tr><td>Tom</td><td>23</td></tr><tr><td>Nicola</td><td>26</td></tr>
    </table></body></html>", options);
//Then call the save or save_to method
client.save_to("result.csv", function (error, id){
    //this callback is called once the capture is downloaded
    if (error != null){
        throw error;
    }
});
var grabzit = require('grabzit');

var client = new grabzit("Sign in to view your Application Key", "Sign in to view your Application Secret");

var options = {"tableNumberToInclude":2};

client.file_to_table("tables.html", options);
//Then call the save or save_to method
client.save_to("result.csv", function (error, id){
    //this callback is called once the capture is downloaded
    if (error != null){
        throw error;
    }
});

您還可以指定 targetElement 屬性,將確保僅轉換指定元素ID中的表。

var grabzit = require('grabzit');

var client = new grabzit("Sign in to view your Application Key", "Sign in to view your Application Secret");

var options = {"targetElement":"stocks_table"};

client.url_to_table("https://www.tesla.com", options);
//Then call the save or save_to method
client.save_to("result.csv", function (error, id){
    //this callback is called once the capture is downloaded
    if (error != null){
        throw error;
    }
});
var grabzit = require('grabzit');

var client = new grabzit("Sign in to view your Application Key", "Sign in to view your Application Secret");

var options = {"targetElement":"stocks_table"};

client.html_to_table("<html><body><table id='stocks_table'><tr><th>Name</th><th>Age</th></tr>
    <tr><td>Tom</td><td>23</td></tr><tr><td>Nicola</td><td>26</td></tr>
    </table></body></html>", options);
//Then call the save or save_to method
client.save_to("result.csv", function (error, id){
    //this callback is called once the capture is downloaded
    if (error != null){
        throw error;
    }
});
var grabzit = require('grabzit');

var client = new grabzit("Sign in to view your Application Key", "Sign in to view your Application Secret");

var options = {"targetElement":"stocks_table"};

client.file_to_table("tables.html", options);
//Then call the save or save_to method
client.save_to("result.csv", function (error, id){
    //this callback is called once the capture is downloaded
    if (error != null){
        throw error;
    }
});

另外,您也可以通過將true傳遞給 includeAllTables 屬性,但是僅適用於JSON和XLSX格式。 此選項會將每個表放在生成的電子表格工作簿中的新表中。

var grabzit = require('grabzit');

var client = new grabzit("Sign in to view your Application Key", "Sign in to view your Application Secret");

var options = {"format","xlsx","includeHeaderNames":true,"includeAllTables":true};

client.url_to_table("https://www.tesla.com", options);
//Then call the save or save_to method
client.save_to("result.xlsx", function (error, id){
    //this callback is called once the capture is downloaded
    if (error != null){
        throw error;
    }
});
var grabzit = require('grabzit');

var client = new grabzit("Sign in to view your Application Key", "Sign in to view your Application Secret");

var options = {"format","xlsx","includeHeaderNames":true,"includeAllTables":true};

client.html_to_table("<html><body><table><tr><th>Name</th><th>Age</th></tr>
    <tr><td>Tom</td><td>23</td></tr><tr><td>Nicola</td><td>26</td></tr>
    </table></body></html>", options);
//Then call the save or save_to method
client.save_to("result.xlsx", function (error, id){
    //this callback is called once the capture is downloaded
    if (error != null){
        throw error;
    }
});
var grabzit = require('grabzit');

var client = new grabzit("Sign in to view your Application Key", "Sign in to view your Application Secret");

var options = {"format","xlsx","includeHeaderNames":true,"includeAllTables":true};

client.file_to_table("tables.html", options);
//Then call the save or save_to method
client.save_to("result.xlsx", function (error, id){
    //this callback is called once the capture is downloaded
    if (error != null){
        throw error;
    }
});

將HTML表轉換為JSON

通過使用Node.js和GrabzIt,您可以轉換HTML表 into JSON,只需指定 json 在format參數中。 如下面的示例所示, save_to 方法完成後,在結果變量中使用JSON調用oncomplete函數,然後由內置Node.js對其進行解析 JSON.parse 函數創建代表HTML表的對象。

var grabzit = require('grabzit');

var client = new grabzit("Sign in to view your Application Key", "Sign in to view your Application Secret");

var options = {"format","json","includeHeaderNames":true,"includeAllTables":true};
client.url_to_table("https://www.tesla.com", options);

client.save_to(null, function(error, result){
    if (result != null)
    {
        var tableObj = JSON.parse(result);
    }
});

自訂識別碼

您可以將自定義標識符傳遞給 方法,如下所示,然後將此值返回到GrabzIt Node.js處理程序。 例如,該自定義標識符可以是數據庫標識符,從而允許將屏幕截圖與特定數據庫記錄相關聯。

var grabzit = require('grabzit');

var client = new grabzit("Sign in to view your Application Key", "Sign in to view your Application Secret");

var options = {"customId":123456};

client.url_to_table("https://www.tesla.com", options);
//Then call the save method
client.save("http://www.example.com/handler", function (error, id){
    if (error != null){
        throw error;
    }
});
var grabzit = require('grabzit');

var client = new grabzit("Sign in to view your Application Key", "Sign in to view your Application Secret");

var options = {"customId":123456};

client.html_to_table("<html><body><h1>Hello World!</h1></body></html>", options);
//Then call the save method
client.save("http://www.example.com/handler", function (error, id){
    if (error != null){
        throw error;
    }
});
var grabzit = require('grabzit');

var client = new grabzit("Sign in to view your Application Key", "Sign in to view your Application Secret");

var options = {"customId":123456};

client.file_to_table("example.html", options);
//Then call the save method
client.save("http://www.example.com/handler", function (error, id){
    if (error != null){
        throw error;
    }
});