java如何获取网页中的文字

如题所述

package test;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.Authenticator;
import java.net.HttpURLConnection;
import java.net.PasswordAuthentication;
import java.net.URL;
import java.net.URLConnection;
import java.util.Properties;

public class URLTest {
// 一个public方法,返回字符串,错误则返回"error open url"
public static String getContent(String strUrl) {
try {
URL url = new URL(strUrl);
BufferedReader br = new BufferedReader(new InputStreamReader(url
.openStream()));
String s = "";
StringBuffer sb = new StringBuffer("");
while ((s = br.readLine()) != null) {
sb.append(s + "/r/n");
}
br.close();
return sb.toString();
} catch (Exception e) {
return "error open url:" + strUrl;
}
}

public static void initProxy(String host, int port, final String username,
final String password) {
Authenticator.setDefault(new Authenticator() {
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(username,
new String(password).toCharArray());
}
});
System.setProperty("http.proxyType", "4");
System.setProperty("http.proxyPort", Integer.toString(port));
System.setProperty("http.proxyHost", host);
System.setProperty("http.proxySet", "true");
}

public static void main(String[] args) throws IOException {
String url = "https://www.jb51.net";
String proxy = "http://192.168.22.81";
int port = 80;
String username = "username";
String password = "password";
String curLine = "";
String content = "";
URL server = new URL(url);
initProxy(proxy, port, username, password);
HttpURLConnection connection = (HttpURLConnection) server
.openConnection();
connection.connect();
InputStream is = connection.getInputStream();
BufferedReader reader = new BufferedReader(new
InputStreamReader(is));
while ((curLine = reader.readLine()) != null) {
content = content + curLine+ "/r/n";
}
System.out.println("content= " + content);
is.close();
System.out.println(getContent(url));
}
}
温馨提示:答案为网友推荐,仅供参考
第1个回答  2016-05-21
java是后台. 网页呢属于前段. 如果需要让java获取到网页上的文字这就是交互
可以通过超链接. ajax.等手段向后台制定接口传值. 这样后台也就是java就会获取到了
第2个回答  推荐于2018-05-06
可以使用jsoup进行网页html读取,然后遍历html获取对应的文字内容。本回答被网友采纳
第3个回答  2016-06-01
如果要获取表单的内容,
<from>
<input type="text" name= "username" value=""/>
</from>
request.getparameter("username");

如果是获取网页内容,估计是要获取url,从头到尾爬了
第4个回答  2016-06-28
//获取页面上的文字用String就可以了呀
String name=request.getParament("name");

相似回答