- package regex;
- import java.io.BufferedReader;
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.InputStreamReader;
- import java.io.PrintWriter;
- import java.net.MalformedURLException;
- import java.net.URL;
- import java.net.URLConnection;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- //爬虫小程序
- public class Carwler {
- public static void main (String[] args) throws Exception{
- URL url = new URL("http://tieba.baidu.com/p/2711844440");
- URLConnection urlcon = url.openConnection();
- BufferedReader br = new BufferedReader(new InputStreamReader(urlcon.getInputStream()));
- PrintWriter pw = new PrintWriter(new File("E:\\carwler.txt"));
- String regex="\\w+@\\w+(\\.\\w+)+";
- Pattern p = Pattern.compile(regex);
- String line=null;
- while((line=br.readLine())!=null){
- Matcher mm = p.matcher(line);
- while(mm.find()){
- //System.out.println(mm.group()); <font color="#ff0000">//这里出现了一点问题,可以直接在控制台上打印,可以为什么不能写入到一个文件中呢??</font>
|
|