Get file encoding : Encoding « I18N « Java

Home
Java
1.2D Graphics GUI
2.3D
3.Advanced Graphics
4.Ant
5.Apache Common
6.Chart
7.Class
8.Collections Data Structure
9.Data Type
10.Database SQL JDBC
11.Design Pattern
12.Development Class
13.EJB3
14.Email
15.Event
16.File Input Output
17.Game
18.Generics
19.GWT
20.Hibernate
21.I18N
22.J2EE
23.J2ME
24.JDK 6
25.JNDI LDAP
26.JPA
27.JSP
28.JSTL
29.Language Basics
30.Network Protocol
31.PDF RTF
32.Reflection
33.Regular Expressions
34.Scripting
35.Security
36.Servlets
37.Spring
38.Swing Components
39.Swing JFC
40.SWT JFace Eclipse
41.Threads
42.Tiny Application
43.Velocity
44.Web Services SOA
45.XML
Java Tutorial
Java Book
Java Source Code / Java Documentation
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
SCJP
Java » I18N » EncodingScreenshots 
Get file encoding
     
/*
 * Copyright (c) 2006, Chuck Mortimore - xmldap.org
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the names xmldap, xmldap.org, xmldap.com nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */


/*
 
 * Functions to read XML files considering byte order marks
 
 */
//package org.xmldap.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

public class XmlFileUtil {
  public static int getBomLength(byte[] b) {
    int b0 = 0;
    int b1 = 0;

    if (b.length > 2) {
      b0 = b[00xFF;
      b1 = b[10xFF;

      if (b0 == 0xFE && b1 == 0xFF) {
        return 2;
      else if (b0 == 0xFF && b1 == 0xFE) {
        return 2;
      }
    else {
      return 0;
    }

    if (b.length > 3) {
      final int b2 = b[20xFF;
      if (b0 != 0xEF || b1 != 0xBB || b2 != 0xBF) {
        return 0;
      else {
        return 3;
      }
    }
    return 0;

  }

  public static String getEncoding(byte[] b) {
    int b0 = 0;
    int b1 = 0;

    if (b.length > 2) {
      b0 = b[00xFF;
      b1 = b[10xFF;

      if (b0 == 0xFE && b1 == 0xFF) {
        return "UTF-16BE";
      else if (b0 == 0xFF && b1 == 0xFE) {
        return "UTF-16LE";
      }
    else {
      return null;
    }

    if (b.length > 3) {
      final int b2 = b[20xFF;
      if (b0 != 0xEF || b1 != 0xBB || b2 != 0xBF) {
        return null;
      else {
        return "UTF-8";
      }
    }
    return null;

  }

  /**
   * Removes the byte order mark from the stream, if it exists and returns the
   * encoding name.
   
   * Adapted code from org/apache/xerces/xinclude/XIncludeTextReader.java
   
   * If null is returned then some bytes were read but they were no BOM bytes.
   * You have to reset the stream in this case. I don't do that here because
   * mark/reset is not implemented on all plattforms (windows)
   
   @param stream
   @return
   @throws IOException
   */
  public static String getEncoding(InputStream streamthrows IOException {

    stream.mark(4);

    byte[] b = new byte[3];
    int count = 0;
    int b0 = 0;
    int b1 = 0;

    count = stream.read(b, 02);
    if (count == 2) {
      b0 = b[00xFF;
      b1 = b[10xFF;

      if (b0 == 0xFE && b1 == 0xFF) {
        return "UTF-16BE";
      else if (b0 == 0xFF && b1 == 0xFE) {
        return "UTF-16LE";
      }
    else {
      return null;
    }

    byte[] B = new byte[1];
    count = stream.read(B, 01);
    if (count == 1) {
      final int b2 = B[00xFF;
      if (b0 != 0xEF || b1 != 0xBB || b2 != 0xBF) {
        // First three bytes are not BOM, so reset.
        stream.reset();
      else {
        return "UTF-8";
      }
    }
    return null;
  }

  static public String doRead(InputStream inthrows IOException {
    BufferedReader ins = new BufferedReader(new InputStreamReader(in));

    StringBuilder sb = new StringBuilder();
    try {
      int c = -1;
      char[] charBuf = null;
      while (true) {
        int len = in.available();
        if (len > 0) {
          if (charBuf == null) {
            charBuf = new char[len];
          else {
            if (len > charBuf.length) {
              charBuf = new char[len];
            }
          }
        else {
          // available is not always relyable
          if (charBuf == null) {
            charBuf = new char[2048];
          else {
            if (2048 > charBuf.length) {
              charBuf = new char[2048];
            }
          }
        }
        c = ins.read(charBuf, 0, charBuf.length);
        if (c == -1) {
          break;
        else {
          sb.append(charBuf, 0, c);
        }
      }
    finally {
      try {
        in.close();
      catch (IOException e) {
      }
      try {
        ins.close();
      catch (IOException e) {
      }
    }
    return sb.toString();
  }

}

   
    
    
    
    
  
Related examples in the same category
1.Convert Encoding
2.Utility class for working with character sets
3.Utility methods for ASCII character checking.
4.Reader for UCS-2 and UCS-4 encodings. (i.e., encodings from ISO-10646-UCS-(2|4)).
5.Conversions between IANA encoding names and Java encoding names, and vice versa.
6.ASCII character handling functions
7.This class represents an encoding.
8.Provides information about encodings.
9.Codec for the Quoted-Printable section of http://www.ietf.org/rfc/rfc1521.txt (RFC 1521)
10.ISO 8859-8, ASCII plus Hebrew
11.TIS-620 does not have the non-breaking space or the C1 controls.
12.ISO-8859-1; a.k.a. Latin-1
13.ISO 8859-2, a.k.a. Latin-2
14.ISO 8859-3
15.ISO 8859-4, Latin plus the characters needed for Greenlandic, Icelandic, and Lappish.
16.ISO 8859-9 for Turkish.
17.ISO-8859-10, for Lithuanian, Estonian, Greenlandic, Icelandic, Inuit, Lappish, and other Northern European languages.
18.ISO-8859-13, for Latvian and other Baltic languages.
19.ISO-8859-14, for Gaelic, Welsh, and other Celtic languages.
20.ISO 8859-9 for Western Europe. Includes the Euro sign and several uncommon French letters
21.ISO 8859-16, Romanian
22.ASCII Writer
23.UCS Writer
24.Unicode Writer
25.Whether a character is or is not available in a particular encoding
26.ISO 8859-6, ASCII plus Arabic
27.ISO 8859-5, ASCII plus Cyrillic (Russian, Byelorussian, etc.)
28.ISO 8859-7, ASCII plus Greek
29.IANA to Java Mapping
30.Java to IANA Mapping
31.EncodingMap is a convenience class which handles conversions between IANA encoding names and Java encoding names, and vice versa.
w__w_w__.___j_a_v___a___2___s__.___c___o__m___ | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.