|
What this is
This file is included in the DevDaily.com
"Java Source Code
Warehouse" project. The intent of this project is to help you "Learn
Java by Example" TM.
Other links
The source code
/*
* Sun Public License Notice
*
* The contents of this file are subject to the Sun Public License
* Version 1.0 (the "License"). You may not use this file except in
* compliance with the License. A copy of the License is available at
* http://www.sun.com/
*
* The Original Code is NetBeans. The Initial Developer of the Original
* Code is Sun Microsystems, Inc. Portions Copyright 1997-2003 Sun
* Microsystems, Inc. All Rights Reserved.
*/
package org.netbeans.modules.tasklist.providers;
import org.openide.ErrorManager;
import java.io.*;
/**
* XML uses inband encoding detection - this class obtains it.
*
* @author Petr Kuzel
* @version 1.0
*/
final class XMLEncodingHelper extends Object {
//
// taken from XML module xml.core.lib.EncodingHelper
//
// heuristic constant guessing max prolog length
private static final int EXPECTED_PROLOG_LENGTH = 1000;
/** Detect input stream encoding.
* The stream stays intact.
* @return iana encoding names or Java hisrotical ("UTF8", "ASCII", etc.) or null
* if the stream is not markable or enoding cannot be detected.
*/
public static String detectEncoding(InputStream in) throws IOException {
if (! in.markSupported()) {
ErrorManager.getDefault().log("XMLEncodingHelper got unmarkable stream: " + in.getClass()); // NOI18N
return null;
}
try {
in.mark(EXPECTED_PROLOG_LENGTH);
byte[] bytes = new byte[EXPECTED_PROLOG_LENGTH];
for (int i = 0; inull for unrecognized
*/
static String autoDetectEncoding(byte[] buf) throws IOException {
if (buf.length >= 4) {
switch (buf[0]) {
case 0:
// byte order mark of (1234-big endian) or (2143) USC-4
// or '<' encoded as UCS-4 (1234, 2143, 3412) or UTF-16BE
if (buf[1] == (byte)0x3c && buf[2] == (byte)0x00 && buf[3] == (byte)0x3f) {
return "UnicodeBigUnmarked"; // NOI18N
}
// else it's probably UCS-4
break;
case 0x3c:
switch (buf[1]) {
// First character is '<'; could be XML without
// an XML directive such as "", " |