Jsoup HelloWorld实现
我们用Jsoup来提取下http://www.cnblogs.com/ 博客园的 网页title 标题 和 口号;

这里我们要用到前面讲的HttpClient来获取网页内容:
pom.xml:
<!-- 添加HttpCient支持 --> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.5.2</version> </dependency> <!-- 添加Jsoup支持 --> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.10.2</version> </dependency>
实例代码:
package com.open1111.jsoup;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Demo01 {
public static void main(String[] args) throws Exception{
CloseableHttpClient httpclient = HttpClients.createDefault(); // 创建httpclient实例
HttpGet httpget = new HttpGet("http://www.cnblogs.com/"); // 创建httpget实例
CloseableHttpResponse response = httpclient.execute(httpget); // 执行get请求
HttpEntity entity=response.getEntity(); // 获取返回实体
String webContent=EntityUtils.toString(entity, "utf-8");
// System.out.println("网页内容:"+webContent); // 指定编码打印网页内容
response.close(); // 关闭流和释放系统资源
Document doc=Jsoup.parse(webContent);
Elements elements=doc.getElementsByTag("title");
Element element=elements.get(0);
String title=element.text();
System.out.println("网页标题是:"+title);
Element element2=doc.getElementById("site_nav_top");
String navTop=element2.text();
System.out.println("口号:"+navTop);
}
}运行输出:
网页标题是:博客园 - 开发者的网上家园
口号:代码改变世界
具体代码讲解《一头扎进Jsoup》视频教程会详细讲解;
上一篇:Jsoup简介