International friendly string comparison with case-order : Unicode « I18N « Java

Home
Java
1.2D Graphics GUI
2.3D
3.Advanced Graphics
4.Ant
5.Apache Common
6.Chart
7.Class
8.Collections Data Structure
9.Data Type
10.Database SQL JDBC
11.Design Pattern
12.Development Class
13.EJB3
14.Email
15.Event
16.File Input Output
17.Game
18.Generics
19.GWT
20.Hibernate
21.I18N
22.J2EE
23.J2ME
24.JavaFX
25.JDK 6
26.JDK 7
27.JNDI LDAP
28.JPA
29.JSP
30.JSTL
31.Language Basics
32.Network Protocol
33.PDF RTF
34.Reflection
35.Regular Expressions
36.Scripting
37.Security
38.Servlets
39.Spring
40.Swing Components
41.Swing JFC
42.SWT JFace Eclipse
43.Threads
44.Tiny Application
45.Velocity
46.Web Services SOA
47.XML
Java » I18N » Unicode 




International friendly string comparison with case-order
     
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the  "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
 * $Id: StringComparable.java 468655 2006-10-28 07:12:06Z minchau $
 */


import java.util.Vector;
import java.text.Collator;
import java.text.RuleBasedCollator;
import java.text.CollationElementIterator;
import java.util.Locale;
import java.text.CollationKey;


/**
* International friendly string comparison with case-order
 @author Igor Hersht, [email protected]
*/
public class StringComparable implements Comparable  {
    
     public final static int UNKNOWN_CASE = -1;
     public final static int UPPER_CASE = 1;
     public final static int LOWER_CASE = 2;
     
     private  String m_text;
     private  Locale m_locale;
     private RuleBasedCollator m_collator;
     private String m_caseOrder;
     private int m_mask = 0xFFFFFFFF
     
    public StringComparable(final String text, final Locale locale, final Collator collator, final String caseOrder){
         m_text =  text;
         m_locale = locale;
         m_collator = (RuleBasedCollator)collator;
         m_caseOrder = caseOrder;
         m_mask = getMask(m_collator.getStrength());
    }
  
   public final static Comparable getComparatorfinal String text, final Locale locale, final Collator collator, final String caseOrder){
       if((caseOrder == null||(caseOrder.length() == 0)){// no case-order specified
            return  ((RuleBasedCollator)collator).getCollationKey(text);
       }else{
            return new StringComparable(text, locale, collator, caseOrder);
       }       
   }
   
   public final String toString(){return m_text;}
   
   public int compareTo(Object o) {
   final String pattern = ((StringComparable)o).toString();
   if(m_text.equals(pattern)){//Code-point equals 
      return 0;
   }
   final int savedStrength = m_collator.getStrength()
   int comp = 0;
      // Is there difference more significant than case-order?     
     if(((savedStrength == Collator.PRIMARY|| (savedStrength == Collator.SECONDARY))){  
         comp = m_collator.compare(m_text, pattern );     
     }else{// more than SECONDARY
         m_collator.setStrength(Collator.SECONDARY);
         comp = m_collator.compare(m_text, pattern );
         m_collator.setStrength(savedStrength);
     }
     if(comp != 0){//Difference more significant than case-order 
        return comp ; 
     }      
        
      // No difference more significant than case-order.     
      // Find case difference
       comp = getCaseDiff(m_text, pattern);
       if(comp != 0){  
           return comp;
       }else{// No case differences. Less significant difference could exist 
            return m_collator.compare(m_text, pattern );
       }      
  }
  
 
  private final int getCaseDiff (final String text, final String pattern){
     final int savedStrength = m_collator.getStrength();
     final int savedDecomposition = m_collator.getDecomposition();
     m_collator.setStrength(Collator.TERTIARY);// not to ignore case  
     m_collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION );// corresponds NDF
     
    final int diff[] =getFirstCaseDiff (text, pattern, m_locale);
    m_collator.setStrength(savedStrength);// restore
    m_collator.setDecomposition(savedDecomposition)//restore
    if(diff != null){  
       if((m_caseOrder).equals("upper-first")){
            if(diff[0== UPPER_CASE){
                return -1;
            }else{
                return 1;
            }
       }else{// lower-first
            if(diff[0== LOWER_CASE){
                return -1;
            }else{
                return 1;
            }
       }
   }else{// No case differences
        return 0;
   }
    
  }
  
   
    
  private final int[] getFirstCaseDiff(final String text, final String pattern, final Locale locale){
        
        final CollationElementIterator targIter = m_collator.getCollationElementIterator(text);
        final CollationElementIterator patIter = m_collator.getCollationElementIterator(pattern);  
        int startTarg = -1;
        int endTarg = -1;
        int startPatt = -1;
        int endPatt = -1;
        final int done = getElement(CollationElementIterator.NULLORDER);
        int patternElement = 0, targetElement = 0;
        boolean getPattern = true, getTarget = true;
        
        while (true) { 
            if (getPattern){
                 startPatt = patIter.getOffset();
                 patternElement = getElement(patIter.next());
                 endPatt = patIter.getOffset();
            }
            if ((getTarget)){               
                 startTarg  = targIter.getOffset()
                 targetElement   = getElement(targIter.next())
                 endTarg  = targIter.getOffset();
            }
            getTarget = getPattern = true;                          
            if ((patternElement == done||targetElement == done)) {
                return null;                      
            else if (targetElement == 0) {
              getPattern = false;           
            else if (patternElement == 0) {
              getTarget = false;           
            else if (targetElement != patternElement) {// mismatch
                if((startPatt < endPatt&& (startTarg < endTarg)){
                    final String  subText = text.substring(startTarg, endTarg);
                    final String  subPatt = pattern.substring(startPatt, endPatt);
                    final String  subTextUp = subText.toUpperCase(locale);
                    final String  subPattUp = subPatt.toUpperCase(locale);
                    if(m_collator.compare(subTextUp, subPattUp!= 0){ // not case diffference
                        continue;
                    }
                    
                    int diff[] {UNKNOWN_CASE, UNKNOWN_CASE};
                    if(m_collator.compare(subText, subTextUp== 0){
                        diff[0= UPPER_CASE;
                    }else if(m_collator.compare(subText, subText.toLowerCase(locale)) == 0){
                       diff[0= LOWER_CASE; 
                    }
                    if(m_collator.compare(subPatt, subPattUp== 0){
                        diff[1= UPPER_CASE;
                    }else if(m_collator.compare(subPatt, subPatt.toLowerCase(locale)) == 0){
                       diff[1= LOWER_CASE; 
                    }
                    
                    if(((diff[0== UPPER_CASE&& diff[1== LOWER_CASE)) ||
                       ((diff[0== LOWER_CASE&& diff[1== UPPER_CASE))){        
                        return diff;
                    }else{// not case diff
                      continue
                    }  
                }else{
                    continue
                }
                    
           }
        }
                           
  }
  
  
 // Return a mask for the part of the order we're interested in
    private static final int getMask(final int strength) {
        switch (strength) {
            case Collator.PRIMARY:
                return 0xFFFF0000;
            case Collator.SECONDARY:
                return 0xFFFFFF00;
            default
                return 0xFFFFFFFF;
        }
    }
    //get collation element with given strength
    // from the element with max strength
  private final int getElement(int maxStrengthElement){
    
    return (maxStrengthElement & m_mask);
  }  

}//StringComparable 

   
    
    
    
    
  














Related examples in the same category
1.Unicode DisplayUnicode Display
2.Character Sets and Unicode: Code Set Conversion
3.Display "special character" using Unicode
4.Generic unicode textreader, which will use BOM mark to identify the encoding to be used. If BOM is not found then use a given default or system encoding.
5.Convert into Hexadecimal notation of Unicode
6.Generic Unicode text reader, which uses a BOM (Byte Order Mark) to identify the encoding to be used.
7.Generic unicode text reader.
8.processing SGML into unicode characters.
9.Write a 16 bit short as LITTLE_ENDIAN
10.Write a 32 bit int as LITTLE_ENDIAN.
11.Arabic Reshaper
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.