黑马程序员技术交流社区
标题:
新手学习正则麻烦帮我看1下这个问题
[打印本页]
作者:
范德农
时间:
2012-12-24 14:02
标题:
新手学习正则麻烦帮我看1下这个问题
/** * <p>Title: </p> * * <p>Description: </p> * * <p>Copyright: Copyright (c) 2012</p> * * <p>Company: </p> * * @author not attributable * @version 1.0 */import java.io.*; import java.net.*; import java.util.*; import java.util.regex.*; import java.util.zip.*; import com.tcsos.util.Regexer; public class test { //返回所有组的正则 public static ArrayDeque<String[]> regexAllGroups(String original, String regex) { int total = 0; String[] ary = null; ArrayDeque Q = new ArrayDeque(); if (original == null || regex == null) { return Q; } Pattern p = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(original); while (m != null && m.find()) { //该正则在这里m.find卡死。。 total = m.groupCount(); if (total < 1) { continue; } ary = new String[total]; for (int i = 1; i <= total; i++) { ary[i - 1] = new String(m.group(i)); } Q.add(ary); } m = null; p = null; return Q; } //获取网页源码 public static String getUrlHtml(String strURL) { String body = null; String contentEncoding = null; URL _URL = null; InputStream IN = null; HttpURLConnection CONNECTION = null; try { _URL = new URL(strURL); CONNECTION = (HttpURLConnection) _URL.openConnection(); CONNECTION.setConnectTimeout(3000); CONNECTION.setReadTimeout(3000); CONNECTION.setRequestProperty("Accept-Encoding", "gzip,deflate"); CONNECTION.setRequestProperty("Accept", "*/*"); CONNECTION.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)"); CONNECTION.setRequestProperty("Connection", "close"); CONNECTION.setRequestMethod("GET"); CONNECTION.setFollowRedirects(true); CONNECTION.setUseCaches(false); CONNECTION.setInstanceFollowRedirects(true); /*判断是不是GZIP/DEFLATE压缩格式的网页*/ int type = 0; contentEncoding = CONNECTION.getContentEncoding(); if (contentEncoding != null) { contentEncoding = contentEncoding.toLowerCase(); if (contentEncoding.indexOf("gzip") != -1) { type = 1; } if (contentEncoding.indexOf("deflate") != -1) { type = 2; } } switch (type) { case 1: IN = new GZIPInputStream(CONNECTION.getInputStream()); break; case 2: IN = new InflaterInputStream(CONNECTION.getInputStream()); break; default: IN = CONNECTION.getInputStream(); break; } byte[] b = null; if (IN != null && (b = inputStreamToByte(IN)) != null) { body = new String(b, "utf-8"); IN.close(); } CONNECTION.disconnect(); b = null; } catch (Exception e) { try { if (IN != null) { IN.close(); } if (CONNECTION != null) { CONNECTION.disconnect(); } } catch (Exception ex) { } body = null; } IN = null; _URL = null; CONNECTION = null; return body; } public static byte[] inputStreamToByte(InputStream in) { if (in == null) { return null; } int ch; byte[] b = null; ByteArrayOutputStream stream = new ByteArrayOutputStream(); try { while ((ch = in.read()) != -1) { stream.write(ch); } b = stream.toByteArray(); stream.reset(); stream.close(); in.close(); } catch (Exception e) { e.printStackTrace(); } in = null; stream = null; return b; } public static void main(String[] args) { //正则表达式 String regex = "(?s)<table cellpadding=\"0\" cellspacing=\"0\" class=\"result\" id=\"\\d+\".*><tr><td class=f>.*<h3 class=\"t\"><a.*href=\"(.*?)\".*target=\"_blank\".*>(.*?)</a>(.*?)<br>.*<span class=\"g\">(.*?)</span>"; //获取该网页地址的html源代码 String html = getUrlHtml("http://www.baidu.com/s?wd=火车票&pn=0&rn=100&usm=1"); //这里卡很久,而且只出1个值,搞了几天无解。。。。 ArrayDeque<String[]> Q = regexAllGroups(html, regex); System.out.println(Q.size()); } }
复制代码
作者:
范德农
时间:
2012-12-24 14:03
/**
* <p>Title: </p>
*
* <p>Description: </p>
*
* <p>Copyright: Copyright (c) 2012</p>
*
* <p>Company: </p>
*
* @author not attributable
* @version 1.0
*/
import java.io.*;
import java.net.*;
import java.util.*;
import java.util.regex.*;
import java.util.zip.*;
import com.tcsos.util.Regexer;
public class test {
//返回所有组的正则
public static ArrayDeque<String[]> regexAllGroups(String original, String regex) {
int total = 0;
String[] ary = null;
ArrayDeque Q = new ArrayDeque();
if (original == null || regex == null) {
return Q;
}
Pattern p = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(original);
while (m != null && m.find()) { //该正则在这里m.find卡死。。
total = m.groupCount();
if (total < 1) {
continue;
}
ary = new String[total];
for (int i = 1; i <= total; i++) {
ary[i - 1] = new String(m.group(i));
}
Q.add(ary);
}
m = null;
p = null;
return Q;
}
复制代码
欢迎光临 黑马程序员技术交流社区 (http://bbs.itheima.com/)
黑马程序员IT技术论坛 X3.2