Source for java.text.CollationElementIterator

   1: /* CollationElementIterator.java -- Walks through collation elements
   2:    Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004  Free Software Foundation
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10:  
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.text;
  40: 
  41: import java.util.ArrayList;
  42: 
  43: /* Written using "Java Class Libraries", 2nd edition, plus online
  44:  * API docs for JDK 1.2 from http://www.javasoft.com.
  45:  * Status: Believed complete and correct to JDK 1.1.
  46:  */
  47: 
  48: /**
  49:  * This class walks through the character collation elements of a 
  50:  * <code>String</code> as defined by the collation rules in an instance of 
  51:  * <code>RuleBasedCollator</code>.  There is no public constructor for
  52:  * this class.  An instance is created by calling the
  53:  * <code>getCollationElementIterator</code> method on 
  54:  * <code>RuleBasedCollator</code>.
  55:  *
  56:  * @author Aaron M. Renn (arenn@urbanophile.com)
  57:  * @author Tom Tromey (tromey@cygnus.com)
  58:  * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
  59:  */
  60: public final class CollationElementIterator
  61: {
  62:   /**
  63:    * This is a constant value that is returned to indicate that the end of 
  64:    * the string was encountered.
  65:    */
  66:   public static final int NULLORDER = -1;
  67: 
  68:   /**
  69:    * This is the RuleBasedCollator this object was created from.
  70:    */
  71:   RuleBasedCollator collator;
  72: 
  73:   /**
  74:    * This is the String that is being iterated over.
  75:    */
  76:   String text;
  77: 
  78:   /**
  79:    * This is the index into the collation decomposition where we are currently scanning.
  80:    */
  81:   int index;
  82: 
  83:   /**
  84:    * This is the index into the String where we are currently scanning.
  85:    */
  86:   int textIndex;
  87: 
  88:   /**
  89:    * Array containing the collation decomposition of the
  90:    * text given to the constructor.
  91:    */
  92:   private RuleBasedCollator.CollationElement[] text_decomposition;
  93: 
  94:   /**
  95:    * Array containing the index of the specified block.
  96:    */
  97:   private int[] text_indexes;
  98: 
  99:   /**
 100:    * This method initializes a new instance of <code>CollationElementIterator</code>
 101:    * to iterate over the specified <code>String</code> using the rules in the
 102:    * specified <code>RuleBasedCollator</code>.
 103:    *
 104:    * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
 105:    * @param text The <code>String</code> to iterate over.
 106:    */
 107:   CollationElementIterator(RuleBasedCollator collator, String text)
 108:   {
 109:     this.collator = collator;
 110:     
 111:     setText (text);    
 112:   }
 113: 
 114:   RuleBasedCollator.CollationElement nextBlock()
 115:   {
 116:     if (index >= text_decomposition.length)
 117:       return null;
 118:     
 119:     RuleBasedCollator.CollationElement e = text_decomposition[index];
 120:     
 121:     textIndex = text_indexes[index+1];
 122: 
 123:     index++;
 124: 
 125:     return e;
 126:   }
 127: 
 128:   RuleBasedCollator.CollationElement previousBlock()
 129:   {
 130:     if (index == 0)
 131:       return null;
 132:     
 133:     index--;
 134:     RuleBasedCollator.CollationElement e = text_decomposition[index];
 135: 
 136:     textIndex = text_indexes[index+1];
 137:     
 138:     return e;
 139:   }
 140: 
 141:   /**
 142:    * This method returns the collation ordering value of the next character sequence
 143:    * in the string (it may be an extended character following collation rules).
 144:    * This method will return <code>NULLORDER</code> if the
 145:    * end of the string was reached.
 146:    *
 147:    * @return The collation ordering value.
 148:    */
 149:   public int next()
 150:   {
 151:     RuleBasedCollator.CollationElement e = nextBlock();
 152: 
 153:     if (e == null)
 154:       return NULLORDER;
 155:     
 156:     return e.getValue();
 157:   }
 158: 
 159:   /**
 160:    * This method returns the collation ordering value of the previous character
 161:    * in the string.  This method will return <code>NULLORDER</code> if the
 162:    * beginning of the string was reached.
 163:    *
 164:    * @return The collation ordering value.
 165:    */
 166:   public int previous()
 167:   {
 168:     RuleBasedCollator.CollationElement e = previousBlock();
 169: 
 170:     if (e == null)
 171:       return NULLORDER;
 172:     
 173:     return e.getValue();
 174:   }
 175: 
 176:   /**
 177:    * This method returns the primary order value for the given collation
 178:    * value.
 179:    *
 180:    * @param order The collation value returned from <code>next()</code> or 
 181:    *              <code>previous()</code>.
 182:    *
 183:    * @return The primary order value of the specified collation value.  This is
 184:    *         the high 16 bits.
 185:    */
 186:   public static int primaryOrder(int order)
 187:   {
 188:     // From the JDK 1.2 spec.
 189:     return order >>> 16;
 190:   }
 191: 
 192:   /**
 193:    * This method resets the internal position pointer to read from the
 194:    * beginning of the <code>String</code> again.
 195:    */
 196:   public void reset()
 197:   {
 198:     index = 0;
 199:     textIndex = 0;
 200:   }
 201: 
 202:   /**
 203:    * This method returns the secondary order value for the given collation
 204:    * value.
 205:    *
 206:    * @param order The collation value returned from <code>next()</code> or 
 207:    *              <code>previous()</code>.
 208:    *
 209:    * @return The secondary order value of the specified collation value.  This 
 210:    *         is the bits 8-15.
 211:    */
 212:   public static short secondaryOrder(int order)
 213:   {
 214:     // From the JDK 1.2 spec.
 215:     return (short) ((order >>> 8) & 255);
 216:   }
 217: 
 218:   /**
 219:    * This method returns the tertiary order value for the given collation
 220:    * value.
 221:    *
 222:    * @param order The collation value returned from <code>next()</code> or 
 223:    *              <code>previous()</code>.
 224:    *
 225:    * @return The tertiary order value of the specified collation value.  This 
 226:    *         is the low eight bits.
 227:    */
 228:   public static short tertiaryOrder(int order)
 229:   {
 230:     // From the JDK 1.2 spec.
 231:     return (short) (order & 255);
 232:   }
 233: 
 234:   /**
 235:    * This method sets the <code>String</code> that it is iterating over
 236:    * to the specified <code>String</code>.
 237:    *
 238:    * @param text The new <code>String</code> to iterate over.
 239:    *
 240:    * @since 1.2
 241:    */
 242:   public void setText(String text)
 243:   {
 244:     int idx = 0;
 245:     int idx_idx = 0;
 246:     int alreadyExpanded = 0;
 247:     int idxToMove = 0;
 248: 
 249:     this.text = text;
 250:     this.index = 0;
 251: 
 252:     String work_text = text.intern();
 253: 
 254:     ArrayList a_element = new ArrayList();
 255:     ArrayList a_idx = new ArrayList();
 256: 
 257:     // Build element collection ordered as they come in "text".
 258:     while (idx < work_text.length())
 259:       {
 260:     String key, key_old;
 261: 
 262:     Object object = null;
 263:     int p = 1;
 264:     
 265:     // IMPROVE: use a TreeMap with a prefix-ordering rule.
 266:     key_old = key = null;
 267:     do
 268:       {
 269:         if (object != null)
 270:           key_old = key;
 271:         key = work_text.substring (idx, idx+p);
 272:         object = collator.prefix_tree.get (key);
 273:         if (object != null && idx < alreadyExpanded)
 274:           {
 275:         RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
 276:         if (prefix.expansion != null && 
 277:             prefix.expansion.startsWith(work_text.substring(0, idx)))
 278:         {
 279:           object = null;
 280:           key = key_old;
 281:         }
 282:           }
 283:         p++;
 284:       }
 285:     while (idx+p <= work_text.length());
 286:     
 287:     if (object == null)
 288:       key = key_old;
 289:     
 290:     RuleBasedCollator.CollationElement prefix =
 291:       (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
 292: 
 293:     /*
 294:      * First case: There is no such sequence in the database.
 295:      * We will have to build one from the context.
 296:      */
 297:     if (prefix == null)
 298:       {
 299:         /*
 300:          * We are dealing with sequences in an expansion. They
 301:          * are treated as accented characters (tertiary order).
 302:          */
 303:         if (alreadyExpanded > 0)
 304:           {
 305:         RuleBasedCollator.CollationElement e =
 306:           collator.getDefaultAccentedElement (work_text.charAt (idx));
 307:         
 308:         a_element.add (e);
 309:         a_idx.add (new Integer(idx_idx));
 310:         idx++;
 311:         alreadyExpanded--;
 312:         if (alreadyExpanded == 0)
 313:           {
 314:             /* There is not any characters left in the expansion set.
 315:              * We can increase the pointer in the source string.
 316:              */
 317:             idx_idx += idxToMove;
 318:             idxToMove = 0; 
 319:           }
 320:         else
 321:           idx_idx++;
 322:           }
 323:         else
 324:           {
 325:         /* This is a normal character. */
 326:         RuleBasedCollator.CollationElement e =
 327:           collator.getDefaultElement (work_text.charAt (idx));
 328:         Integer i_ref = new Integer(idx_idx);
 329: 
 330:         /* Don't forget to mark it as a special sequence so the
 331:          * string can be ordered.
 332:          */
 333:         a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
 334:         a_idx.add (i_ref);
 335:         a_element.add (e);
 336:         a_idx.add (i_ref);
 337:         idx_idx++;
 338:         idx++;
 339:           }
 340:         continue;
 341:       }
 342:  
 343:     /*
 344:      * Second case: Here we have found a matching sequence.
 345:      * Here we have an expansion string prepend it to the "work text" and
 346:      * add the corresponding sorting element. We must also mark 
 347:      */
 348:     if (prefix.expansion != null)
 349:       {
 350:         work_text = prefix.expansion
 351:           + work_text.substring (idx+prefix.key.length());
 352:         idx = 0;
 353:         a_element.add (prefix);
 354:         a_idx.add (new Integer(idx_idx));
 355:         if (alreadyExpanded == 0)
 356:           idxToMove = prefix.key.length();
 357:         alreadyExpanded += prefix.expansion.length()-prefix.key.length();
 358:       }
 359:     else
 360:       {
 361:         /* Third case: the simplest. We have got the prefix and it
 362:          * has not to be expanded.
 363:          */
 364:         a_element.add (prefix);
 365:         a_idx.add (new Integer(idx_idx));
 366:         idx += prefix.key.length();
 367:         /* If the sequence is in an expansion, we must decrease the
 368:          * counter.
 369:          */
 370:         if (alreadyExpanded > 0)
 371:           {
 372:         alreadyExpanded -= prefix.key.length();
 373:         if (alreadyExpanded == 0)
 374:           {
 375:             idx_idx += idxToMove;
 376:             idxToMove = 0;
 377:           }
 378:           }
 379:         else
 380:           idx_idx += prefix.key.length();
 381:       }
 382:       }
 383:     
 384:     text_decomposition = (RuleBasedCollator.CollationElement[])
 385:        a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]);
 386:     text_indexes = new int[a_idx.size()+1];
 387:     for (int i = 0; i < a_idx.size(); i++) 
 388:       {
 389:     text_indexes[i] = ((Integer)a_idx.get(i)).intValue();
 390:       }
 391:     text_indexes[a_idx.size()] = text.length();
 392:   }
 393: 
 394:   /**
 395:    * This method sets the <code>String</code> that it is iterating over
 396:    * to the <code>String</code> represented by the specified
 397:    * <code>CharacterIterator</code>.
 398:    *
 399:    * @param source The <code>CharacterIterator</code> containing the new
 400:    * <code>String</code> to iterate over.
 401:    */
 402:   public void setText(CharacterIterator source)
 403:   {
 404:     StringBuffer expand = new StringBuffer();
 405: 
 406:     // For now assume we read from the beginning of the string.
 407:     for (char c = source.first();
 408:      c != CharacterIterator.DONE;
 409:      c = source.next())
 410:       expand.append(c);
 411: 
 412:     setText(expand.toString());
 413:   }
 414: 
 415:   /**
 416:    * This method returns the current offset into the <code>String</code>
 417:    * that is being iterated over.
 418:    *
 419:    * @return The iteration index position.
 420:    *
 421:    * @since 1.2
 422:    */
 423:   public int getOffset()
 424:   {
 425:     return textIndex;
 426:   }
 427: 
 428:   /**
 429:    * This method sets the iteration index position into the current
 430:    * <code>String</code> to the specified value.  This value must not
 431:    * be negative and must not be greater than the last index position
 432:    * in the <code>String</code>.
 433:    *
 434:    * @param offset The new iteration index position.
 435:    *
 436:    * @exception IllegalArgumentException If the new offset is not valid.
 437:    */
 438:   public void setOffset(int offset)
 439:   {
 440:     if (offset < 0)
 441:       throw new IllegalArgumentException("Negative offset: " + offset);
 442: 
 443:     if (offset > (text.length() - 1))
 444:       throw new IllegalArgumentException("Offset too large: " + offset);
 445:     
 446:     for (index = 0; index < text_decomposition.length; index++)
 447:       {    
 448:     if (offset <= text_indexes[index])
 449:       break;
 450:       }
 451:     /*
 452:      * As text_indexes[0] == 0, we should not have to take care whether index is
 453:      * greater than 0. It is always.
 454:      */
 455:     if (text_indexes[index] == offset)
 456:       textIndex = offset;
 457:     else
 458:       textIndex = text_indexes[index-1];
 459:   }
 460: 
 461:   /**
 462:    * This method returns the maximum length of any expansion sequence that
 463:    * ends with the specified collation order value.  (Whatever that means).
 464:    *
 465:    * @param value The collation order value
 466:    *
 467:    * @return The maximum length of an expansion sequence.
 468:    */
 469:   public int getMaxExpansion(int value)
 470:   {
 471:     return 1;
 472:   }
 473: }