有时候表格比较复杂 有caption header body footer
htmlunit同样提供了api支持:
<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>复杂表格</title> </head> <body> <table id="table1"> <caption>复杂表格</caption> <thead> <tr> <th>个数</th> <th>名称</th> </tr> </thead> <tfoot> <tr> <td>7</td> <td></td> </tr> </tfoot> <tbody> <tr> <td>5</td> <td>猪</td> </tr> </tbody> <tbody> <tr> <td>2</td> <td>牛</td> </tr> </tbody> </table> </body> </html>
这个表格比前面一个复杂点:
测试地址:http://www.java1234.com/crawler/table02.html
我们给下测试代码:
package com.open1111;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.List;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlTable;
import com.gargoylesoftware.htmlunit.html.HtmlTableBody;
import com.gargoylesoftware.htmlunit.html.HtmlTableFooter;
import com.gargoylesoftware.htmlunit.html.HtmlTableHeader;
import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
public class HtmlUnitTest6 {
public static void main(String[] args) {
WebClient webClient=new WebClient(BrowserVersion.FIREFOX_52); // 实例化Web客户端
webClient.getOptions().setCssEnabled(false); // 取消css支持
webClient.getOptions().setJavaScriptEnabled(false); // 取消javascript支持
try {
HtmlPage page=webClient.getPage("http://www.java1234.com/crawler/table02.html"); // 解析获取页面
HtmlTable table=page.getHtmlElementById("table1");
String caption=table.getCaptionText(); // 获取表格标题
System.out.println("表格标题:"+caption);
HtmlTableHeader header=table.getHeader(); // 获取表头信息
List<HtmlTableRow> headerRows=header.getRows(); // 获取所有头行
System.out.println("头信息:");
for(HtmlTableRow row:headerRows){
System.out.println(row.asText());
}
for(HtmlTableBody body:table.getBodies()){ // 获取表格内容信息
List<HtmlTableRow> rows=body.getRows();
for(HtmlTableRow row:rows){
System.out.println(row.asText());
}
}
HtmlTableFooter footer=table.getFooter(); // 获取根信息
List<HtmlTableRow> footerRows=footer.getRows();
System.out.println("根信息:");
for(HtmlTableRow row:footerRows){
System.out.println(row.asText());
}
} catch (FailingHttpStatusCodeException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
webClient.close(); // 关闭客户端,释放内存
}
}
}运行输出:
表格标题:复杂表格
头信息:
个数 名称
5 猪
2 牛
根信息:
7