A股上市公司传智教育(股票代码 003032)旗下技术交流社区北京昌平校区

 找回密码
 加入黑马

QQ登录

只需一步,快速开始

用Java中URLConnection抓取某个URL网页源码(这是原理核心)生成html文件,就是这么简单!就是这么Easy!

需要commons.io和commons.lang两个包
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
/**

*/
public class HTMLGenerator {
public static final String generate(final String url) {
if (StringUtils.isBlank(url)) {
return null;
}
Pattern pattern = Pattern.compile("(http://|https://){1}[\\w\\.\\-/:]+");
Matcher matcher = pattern.matcher(url);
if (!matcher.find()) {
return null;
}
StringBuffer sb = new StringBuffer();
try {
URL _url = new URL(url);
URLConnection urlConnection = _url.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(urlConnection.getInputStream()));
String inputLine;
while ((inputLine = in.readLine()) != null) {
sb.append(inputLine);
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return sb.toString();
}
/**
* Test Code
* Target :
*/
public static void main(String[] args) throws IOException {
String src = HTMLGenerator.generate("http://www.google.cn/");
File file = new File("C:" + File.separator + "index.html");
FileUtils.writeStringToFile(file, src, "UTF-8");
}
}

1 个回复

倒序浏览
奈斯
回复 使用道具 举报
您需要登录后才可以回帖 登录 | 加入黑马