黑马程序员技术交流社区
标题:
详细讲解编码和解码原理的的一个程序
[打印本页]
作者:
wata
时间:
2015-1-13 16:49
标题:
详细讲解编码和解码原理的的一个程序
package day21;
import java.io.*;
import java.util.*;
/*
* 编码:字符串(String)-->字节数组(byte[])
* str.getbytes(charsetName)
*
* 解码:字节数组(byte[])-->字符串(String)
* new String(byte[], charsetName)
*
* -------------------------------------
*
* 默认编码和解码都是GBK,
* GBK编码:两个字节表示一个字符
* UTF-8编码:三个字节表示一个字符
*
*/
public class EncodeDemo {
public static void main(String[] args) throws Exception {
//bianma();
//jiema();
bianma_jiema();
}
//对错误解码进行重新编码和解码
public static void bianma_jiema() throws Exception{
String str = "你好";
//对str进行GBK编码
byte[] b = str.getBytes("GBK");
System.out.println(Arrays.toString(b));
//对b进行错误的ISO8859-1解码
String str2 = new String(b,"ISO8859-1");
System.out.println(str2);
//对str2进行ISO8859-1编码
byte[] b2 = str2.getBytes("ISO8859-1");
System.out.println(Arrays.toString(b2));
//对b2进行GBK解码
String str3 = new String(b2, "GBK");
System.out.println(str3);
/*
结果:
[-60, -29, -70, -61]
????
[-60, -29, -70, -61]
你好
*/
}
//解码演示
public static void jiema() throws Exception{
String s = "你好";
//对s进行编码
byte[] b1 = s.getBytes("GBK");
byte[] b2 = s.getBytes("UTF-8");
//解码
String s1 = new String(b1);//默认解码是GBK
String s2 = new String(b1,"GBK");//GBK-->GBK
String s3 = new String(b1,"UTF-8");//GBK-->UTF-8
String s4 = new String(b2);//默认解码是GBK
String s5 = new String(b2,"GBK");//UTF-8-->GBK
String s6 = new String(b2,"UTF-8");//UTF-8-->UTF-8
//打印解码内容
System.out.println("s1 = "+s1);//你好
System.out.println("s2 = "+s2);//你好
System.out.println("s3 = "+s3);//???
System.out.println("s4 = "+s4);//浣犲ソ
System.out.println("s5 = "+s5);//浣犲ソ
System.out.println("s6 = "+s6);//你好
}
//编码演示
public static void bianma()throws Exception{
String s = "你好";
//编码
byte[] b1 = s.getBytes();//默认编码是GBK
byte[] b2 = s.getBytes("GBK");//指定GBK编码,GBK编码是两个字节代表一个字符
byte[] b3 = s.getBytes("UTF-8");//指定UTF-8编码,UTF-8编码是三个字节代表一个字符
//打印编码结果
System.out.println(Arrays.toString(b1));//[-60, -29, -70, -61]
System.out.println(Arrays.toString(b2));//[-60, -29, -70, -61]
System.out.println(Arrays.toString(b3));//[-28, -67, -96, -27, -91, -67]
}
}
复制代码
作者:
wata
时间:
2015-1-13 17:19
补充:
package day21;
/*
字符“联通”的字节二进制编码是:
11000001
10101010
11001101
10101000
刚好符合UTF-8的编码机制,
所以解码的时候用的是UTF-8解码,所以以出现乱码
---------------------------
UTF-8的编码机制在API文档的java.io包DataInput接口中有
*/
public class EncodeDemo2 {
public static void main(String[] args)throws Exception{
String str = "联通";
byte[] b = str.getBytes("GBK");
for(byte by : b){
System.out.println(Integer.toBinaryString(by&255));
//by的二进制形式是32位,而我们需要的只是后8位,所以用by&255操作获取后8位
}
}
}
复制代码
欢迎光临 黑马程序员技术交流社区 (http://bbs.itheima.com/)
黑马程序员IT技术论坛 X3.2