|
What this is
This file is included in the DevDaily.com
"Java Source Code
Warehouse" project. The intent of this project is to help you "Learn
Java by Example" TM.
Other links
The source code
/*
* Sun Public License Notice
*
* The contents of this file are subject to the Sun Public License
* Version 1.0 (the "License"). You may not use this file except in
* compliance with the License. A copy of the License is available at
* http://www.sun.com/
*
* The Original Code is NetBeans. The Initial Developer of the Original
* Code is Sun Microsystems, Inc. Portions Copyright 1997-2003 Sun
* Microsystems, Inc. All Rights Reserved.
*/
package org.netbeans.modules.xml.core.lib;
import java.io.*;
import javax.swing.text.*;
/**
* XML uses inband encoding detection - this class obtains it.
*
* @author Petr Kuzel
* @version 1.0
*/
public class EncodingHelper extends Object {
// heuristic constant guessing max prolog length
private static final int EXPECTED_PROLOG_LENGTH = 1000;
/** Detect input stream encoding.
* The stream stays intact.
* @return java encoding names ("UTF8", "ASCII", etc.) or null
* if the stream is not markable or enoding cannot be detected.
*/
public static String detectEncoding(InputStream in) throws IOException {
if (! in.markSupported()) {
if ( Util.THIS.isLoggable() ) /* then */ Util.THIS.debug("EncodingHelper got unmarkable stream: " + in.getClass()); // NOI18N
return null;
}
try {
in.mark(EXPECTED_PROLOG_LENGTH);
byte[] bytes = new byte[EXPECTED_PROLOG_LENGTH];
for (int i = 0; inull for unrecognized
*/
static String autoDetectEncoding(byte[] buf) throws IOException {
if (buf.length >= 4) {
switch (buf[0]) {
case 0:
// byte order mark of (1234-big endian) or (2143) USC-4
// or '<' encoded as UCS-4 (1234, 2143, 3412) or UTF-16BE
if (buf[1] == (byte)0x3c && buf[2] == (byte)0x00 && buf[3] == (byte)0x3f) {
return "UnicodeBigUnmarked";
}
// else it's probably UCS-4
break;
case 0x3c:
switch (buf[1]) {
// First character is '<'; could be XML without
// an XML directive such as "", " |