- import java.io.BufferedReader;
- import java.io.FileNotFoundException;
- import java.io.FileReader;
- import java.io.IOException;
- import java.io.InputStreamReader;
- import java.net.URL;
- import java.net.URLConnection;
- import java.sql.Connection;
- import java.util.*;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- public class i {
- public static void main(String[] args) throws IOException{
-
- URL url = new URL("http://tieba.baidu.com/p/3892898422");
-
- URLConnection con = url.openConnection();
-
- BufferedReader bufr = new BufferedReader(new InputStreamReader(con.getInputStream()));
-
- String mailreg = "[a-zA-Z0-9_]{6,12}@[a-zA-Z0-9]+(\\.[a-zA-Z]+){1,3}";
-
- Pattern p = Pattern.compile(mailreg);
-
- int i = 1;
- String line = null;
- while((line=bufr.readLine())!=null){
-
- Matcher m = p.matcher(line);
- while(m.find()){
- System.out.println(i+++":"+m.group());
- }
- }
-
- }
- }
复制代码 网页爬虫确认很好玩啊,大家数据分析的工作应该会经常使用这样的代码吧?
|
|