本帖最后由 孙胜 于 2013-5-7 22:41 编辑
以下代码,获取UTF-8网站,如:baidu, hao120
GBK网站,如sina, qq, tudou, youku.等都没有任何问题
为什么单单sohu网站就是乱码。- import java.io.*;
- import java.net.*;
- public class MyIE {
- private URL url;
- private File file;
-
-
- private BufferedOutputStream out;
- private BufferedInputStream in;
- public static void main(String[] args) throws Exception {
- new MyIE("http://www.sohu.com", "sohu.html");
- }
-
-
- public MyIE(String url, String file) {
- // TODO Auto-generated constructor stub
- try {
- this.url = new URL(url);
- } catch (MalformedURLException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- this.file = new File(file);
-
- downByByte();
- }
-
-
- private void downByByte() {
-
- URLConnection conn = null;
- try {
- conn = url.openConnection();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
-
- try {
- in = new BufferedInputStream(conn.getInputStream());
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- try {
- out = new BufferedOutputStream(new FileOutputStream(file));
- } catch (FileNotFoundException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
-
- int len;
- byte[] buf = new byte[1024];
- try {
- while((len = in.read(buf)) != -1) {
-
- out.write(buf, 0, len);
- }
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- finally {
- if(out != null)
- try {
- out.close();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- if(out != null)
- try {
- out.close();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
- }
复制代码 |