明明不是我們指定UTF-8了嗎?一切都是BOM的問題,這又是另一段故事了,欲知詳情請上Google....
這裡單純說明如何在上列兩任一情況下如何一致讓記事本另存新檔看其編碼為「UTF-8」,下列兩隻CLASS供各位參考:
import java.io.IOException;
import java.io.PushbackInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import sun.nio.cs.StreamDecoder;
public class UnicodeInputStreamReader extends InputStreamReader {
private static final int BOM_SIZE = 4;
private final StreamDecoder decoder;
private PushbackInputStream pushBack;
private String encode;
private String defaultEnc;
public UnicodeInputStreamReader(InputStream input, String defaultEnc) throws UnsupportedEncodingException {
super(input);
try {
this.defaultEnc = defaultEnc;
this.pushBack = new PushbackInputStream(input, BOM_SIZE);
init();
} catch (Exception e) {
e.printStackTrace();
}
this.decoder = StreamDecoder.forInputStreamReader(this.pushBack, this, this.encode);
}
private void init() throws IOException {
byte[] bom = new byte[BOM_SIZE];
int n, unread;
// 初始讀取一次
n = this.pushBack.read(bom, 0, bom.length);
// 比對表頭
if ( (bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF) ) {
this.encode = "UTF-32BE";
unread = n - 4;
} else if ( (bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00) ) {
this.encode = "UTF-32LE";
unread = n - 4;
} else if ( (bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF) ) {
this.encode = "UTF-8";
unread = n - 3;
} else if ( (bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF) ) {
this.encode = "UTF-16BE";
unread = n - 2;
} else if ( (bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) ) {
this.encode = "UTF-16LE";
unread = n - 2;
} else {
// 如果沒有找到任何表頭, 則退回長度等於原先總長
this.encode = this.defaultEnc;
unread = n;
}
// System.out.println("has BOM=" + ((unread == n) ? false : true) + ", encode=" + encode + ", read=" + n + ", unread=" + unread);
// 計算應該退回多少byte
if ( unread > 0 ) {
this.pushBack.unread(bom, (n - unread), unread);
}
}
public String getEncoding() {
return this.encode;
}
public int read() throws IOException {
return this.decoder.read();
}
public int read(char cbuf[], int offset, int length) throws IOException {
return this.decoder.read(cbuf, offset, length);
}
public boolean ready() throws IOException {
return this.decoder.ready();
}
public void close() throws IOException {
this.decoder.close();
}
}
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
public class UTF8OutputStreamWriter extends OutputStreamWriter{
public UTF8OutputStreamWriter(OutputStream pos) throws IOException{
super(pos,"UTF-8");
// write UTF8 BOM mark if file is empty
final byte[] bom = new byte[] { (byte)0xEF, (byte)0xBB, (byte)0xBF };
pos.write(bom);
}
public UTF8OutputStreamWriter(OutputStream pos,String pencoding) throws IOException{
this(pos);
}
}