Source for java.text.Bidi

   1: /* Bidi.java -- Bidirectional Algorithm implementation
   2:    Copyright (C) 2005, 2006  Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10:  
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.text;
  40: 
  41: import java.awt.font.NumericShaper;
  42: import java.awt.font.TextAttribute;
  43: import java.util.ArrayList;
  44: 
  45: 
  46: /**
  47:  * Bidirectional Algorithm implementation.
  48:  *
  49:  * The full algorithm is
  50:  * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard
  51:  * Annex #9: The Bidirectional Algorithm</a>.
  52:  *
  53:  * @since 1.4
  54:  */
  55: public final class Bidi
  56: {
  57:   /**
  58:    * This indicates that a strongly directional character in the text should
  59:    * set the initial direction, but if no such character is found, then the
  60:    * initial direction will be left-to-right.
  61:    */
  62:   public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
  63: 
  64:   /**
  65:    * This indicates that a strongly directional character in the text should
  66:    * set the initial direction, but if no such character is found, then the
  67:    * initial direction will be right-to-left.
  68:    */
  69:   public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
  70: 
  71:   /**
  72:    * This indicates that the initial direction should be left-to-right.
  73:    */
  74:   public static final int DIRECTION_LEFT_TO_RIGHT = 0;
  75: 
  76:   /**
  77:    * This indicates that the initial direction should be right-to-left.
  78:    */
  79:   public static final int DIRECTION_RIGHT_TO_LEFT = 1;
  80: 
  81:   // Flags used when computing the result.
  82:   private static final int LTOR = 1 << DIRECTION_LEFT_TO_RIGHT;
  83:   private static final int RTOL = 1 << DIRECTION_RIGHT_TO_LEFT;
  84: 
  85:   // The text we are examining, and the starting offset.
  86:   // If we had a better way to handle createLineBidi, we wouldn't
  87:   // need this at all -- which for the String case would be an
  88:   // efficiency win.
  89:   private char[] text;
  90:   private int textOffset;
  91:   // The embeddings corresponding to the text, and the starting offset.
  92:   private byte[] embeddings;
  93:   private int embeddingOffset;
  94:   // The length of the text (and embeddings) to use.
  95:   private int length;
  96:   // The flags.
  97:   private int flags;
  98: 
  99:   // All instance fields following this point are initialized
 100:   // during analysis.  Fields before this must be set by the constructor.
 101: 
 102:   // The initial embedding level.
 103:   private int baseEmbedding;
 104:   // The type of each character in the text.
 105:   private byte[] types;
 106:   // The levels we compute.
 107:   private byte[] levels;
 108: 
 109:   // A list of indices where a formatting code was found.  These
 110:   // are indicies into the original text -- not into the text after
 111:   // the codes have been removed.
 112:   private ArrayList formatterIndices;
 113: 
 114:   // Indices of the starts of runs in the text. 
 115:   private int[] runs;
 116: 
 117:   // A convenience field where we keep track of what kinds of runs
 118:   // we've seen.
 119:   private int resultFlags;
 120: 
 121:   /**
 122:    * Create a new Bidi object given an attributed character iterator.
 123:    * This constructor will examine various attributes of the text:
 124:    * <ul>
 125:    * <li> {@link TextAttribute#RUN_DIRECTION} is used to determine the
 126:    * paragraph's base embedding level.  This constructor will recognize
 127:    * either {@link TextAttribute#RUN_DIRECTION_LTR} or
 128:    * {@link TextAttribute#RUN_DIRECTION_RTL}.  If neither is given,
 129:    * {@link #DIRECTION_DEFAULT_LEFT_TO_RIGHT} is assumed.
 130:    * </li>
 131:    * 
 132:    * <li> If {@link TextAttribute#NUMERIC_SHAPING} is seen, then numeric
 133:    * shaping will be done before the Bidi algorithm is run.
 134:    * </li>
 135:    * 
 136:    * <li> If {@link TextAttribute#BIDI_EMBEDDING} is seen on a given
 137:    * character, then the value of this attribute will be used as an
 138:    * embedding level override.
 139:    * </li>
 140:    * </ul>
 141:    * @param iter the attributed character iterator to use
 142:    */
 143:   public Bidi(AttributedCharacterIterator iter)
 144:   {
 145:     // If set, this attribute should be set on all characters.
 146:     // We don't check this (should we?) but we do assume that we
 147:     // can simply examine the first character.
 148:     Object val = iter.getAttribute(TextAttribute.RUN_DIRECTION);
 149:     if (val == TextAttribute.RUN_DIRECTION_LTR)
 150:       this.flags = DIRECTION_LEFT_TO_RIGHT;
 151:     else if (val == TextAttribute.RUN_DIRECTION_RTL)
 152:       this.flags = DIRECTION_RIGHT_TO_LEFT;
 153:     else
 154:       this.flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
 155: 
 156:     // Likewise this attribute should be specified on the whole text.
 157:     // We read it here and then, if it is set, we apply the numeric shaper
 158:     // to the text before processing it.
 159:     NumericShaper shaper = null;
 160:     val = iter.getAttribute(TextAttribute.NUMERIC_SHAPING);
 161:     if (val instanceof NumericShaper)
 162:       shaper = (NumericShaper) val;
 163: 
 164:     char[] text = new char[iter.getEndIndex() - iter.getBeginIndex()];
 165:     this.embeddings = new byte[this.text.length];
 166:     this.embeddingOffset = 0;
 167:     this.length = text.length;
 168:     for (int i = 0; i < this.text.length; ++i)
 169:       {
 170:         this.text[i] = iter.current();
 171: 
 172:         val = iter.getAttribute(TextAttribute.BIDI_EMBEDDING);
 173:         if (val instanceof Integer)
 174:           {
 175:             int ival = ((Integer) val).intValue();
 176:             byte bval;
 177:             if (ival < -62 || ival > 62)
 178:               bval = 0;
 179:             else
 180:               bval = (byte) ival;
 181:             this.embeddings[i] = bval;
 182:           }
 183:       }
 184: 
 185:     // Invoke the numeric shaper, if specified.
 186:     if (shaper != null)
 187:       shaper.shape(this.text, 0, this.length);
 188: 
 189:     runBidi();
 190:   }
 191: 
 192:   /**
 193:    * Create a new Bidi object with the indicated text and, possibly, explicit
 194:    * embedding settings.
 195:    * 
 196:    * If the embeddings array is null, it is ignored.  Otherwise it is taken to
 197:    * be explicit embedding settings corresponding to the text.  Positive values
 198:    * from 1 to 61 are embedding levels, and negative values from -1 to -61 are
 199:    * embedding overrides.  (FIXME: not at all clear what this really means.)
 200:    * 
 201:    * @param text the text to use
 202:    * @param offset the offset of the first character of the text
 203:    * @param embeddings the explicit embeddings, or null if there are none
 204:    * @param embedOffset the offset of the first embedding value to use
 205:    * @param length the length of both the text and the embeddings
 206:    * @param flags a flag indicating the base embedding direction
 207:    */
 208:   public Bidi(char[] text, int offset, byte[] embeddings, int embedOffset,
 209:               int length, int flags)
 210:   {
 211:     if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
 212:         && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
 213:         && flags != DIRECTION_LEFT_TO_RIGHT
 214:         && flags != DIRECTION_RIGHT_TO_LEFT)
 215:       throw new IllegalArgumentException("unrecognized 'flags' argument: "
 216:                                          + flags);
 217:     this.text = text;
 218:     this.textOffset = offset;
 219:     this.embeddings = embeddings;
 220:     this.embeddingOffset = embedOffset;
 221:     this.length = length;
 222:     this.flags = flags;
 223: 
 224:     runBidi();
 225:   }
 226: 
 227:   /**
 228:    * Create a new Bidi object using the contents of the given String
 229:    * as the text.
 230:    * @param text the text to use
 231:    * @param flags a flag indicating the base embedding direction
 232:    */
 233:   public Bidi(String text, int flags)
 234:   {
 235:     if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
 236:         && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
 237:         && flags != DIRECTION_LEFT_TO_RIGHT
 238:         && flags != DIRECTION_RIGHT_TO_LEFT)
 239:       throw new IllegalArgumentException("unrecognized 'flags' argument: "
 240:                                          + flags);
 241: 
 242:     // This is inefficient, but it isn't clear whether it matters.
 243:     // If it does we can change our implementation a bit to allow either
 244:     // a String or a char[].
 245:     this.text = text.toCharArray();
 246:     this.textOffset = 0;
 247:     this.embeddings = null;
 248:     this.embeddingOffset = 0;
 249:     this.length = text.length();
 250:     this.flags = flags;
 251: 
 252:     runBidi();
 253:   }
 254: 
 255:   /**
 256:    * Implementation function which computes the initial type of
 257:    * each character in the input.
 258:    */
 259:   private void computeTypes()
 260:   {
 261:     types = new byte[length];
 262:     for (int i = 0; i < length; ++i)
 263:       types[i] = Character.getDirectionality(text[textOffset + i]);
 264:   }
 265: 
 266:   /**
 267:    * An internal function which implements rules P2 and P3.
 268:    * This computes the base embedding level.
 269:    * @return the paragraph's base embedding level
 270:    */
 271:   private int computeParagraphEmbeddingLevel()
 272:   {
 273:     // First check to see if the user supplied a directionality override.
 274:     if (flags == DIRECTION_LEFT_TO_RIGHT
 275:         || flags == DIRECTION_RIGHT_TO_LEFT)
 276:       return flags;
 277: 
 278:     // This implements rules P2 and P3.
 279:     // (Note that we don't need P1, as the user supplies
 280:     // a paragraph.)
 281:     for (int i = 0; i < length; ++i)
 282:       {
 283:         int dir = types[i];
 284:         if (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT)
 285:           return DIRECTION_LEFT_TO_RIGHT;
 286:         if (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT
 287:             || dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
 288:           return DIRECTION_RIGHT_TO_LEFT;
 289:       }
 290:     return (flags == DIRECTION_DEFAULT_LEFT_TO_RIGHT
 291:             ? DIRECTION_LEFT_TO_RIGHT
 292:             : DIRECTION_RIGHT_TO_LEFT);
 293:   }
 294: 
 295:   /**
 296:    * An internal function which implements rules X1 through X9.
 297:    * This computes the initial levels for the text, handling
 298:    * explicit overrides and embeddings.
 299:    */
 300:   private void computeExplicitLevels()
 301:   {
 302:     levels = new byte[length];
 303:     byte currentEmbedding = (byte) baseEmbedding;
 304:     // The directional override is a Character directionality
 305:     // constant.  -1 means there is no override.
 306:     byte directionalOverride = -1;
 307:     // The stack of pushed embeddings, and the stack pointer.
 308:     // Note that because the direction is inherent in the depth,
 309:     // and because we have a bit left over in a byte, we can encode
 310:     // the override, if any, directly in this value on the stack.
 311:     final int MAX_DEPTH = 62;
 312:     byte[] embeddingStack = new byte[MAX_DEPTH];
 313:     int sp = 0;
 314: 
 315:     for (int i = 0; i < length; ++i)
 316:       {
 317:         // If we see an explicit embedding, we use that, even if
 318:         // the current character is itself a directional override.
 319:         if (embeddings != null && embeddings[embeddingOffset + i] != 0)
 320:           {
 321:             // It isn't at all clear what we're supposed to do here.
 322:             // What does a negative value really mean?
 323:             // Should we push on the embedding stack here?
 324:             currentEmbedding = embeddings[embeddingOffset + i]; 
 325:             if (currentEmbedding < 0)
 326:               {
 327:                 currentEmbedding = (byte) -currentEmbedding;
 328:                 directionalOverride
 329:                   = (((currentEmbedding % 2) == 0)
 330:                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 331:                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 332:               }
 333:             else
 334:               directionalOverride = -1;
 335:             continue;
 336:           }
 337:         // No explicit embedding.
 338:         boolean isLtoR = false;
 339:         boolean isSpecial = true;
 340:         switch (types[i])
 341:           {
 342:             case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
 343:             case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
 344:               isLtoR = true;
 345:               // Fall through.
 346:             case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
 347:             case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
 348:               {
 349:                 byte newEmbedding;
 350:                 if (isLtoR)
 351:                   {
 352:                     // Least greater even.
 353:                     newEmbedding = (byte) ((currentEmbedding & ~1) + 2);
 354:                   }
 355:                 else
 356:                   {
 357:                     // Least greater odd.
 358:                     newEmbedding = (byte) ((currentEmbedding + 1) | 1);
 359:                   }
 360:                 // FIXME: we don't properly handle invalid pushes.
 361:                 if (newEmbedding < MAX_DEPTH)
 362:                   {
 363:                     // The new level is valid.  Push the old value.
 364:                     // See above for a comment on the encoding here.
 365:                     if (directionalOverride != -1)
 366:                       currentEmbedding |= Byte.MIN_VALUE;
 367:                     embeddingStack[sp++] = currentEmbedding;
 368:                     currentEmbedding = newEmbedding;
 369:                     if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE)
 370:                       directionalOverride = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
 371:                     else if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)
 372:                       directionalOverride = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
 373:                     else
 374:                       directionalOverride = -1;
 375:                   }
 376:                 }
 377:               break;
 378:             case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
 379:               {
 380:                 // FIXME: we don't properly handle a pop with a corresponding
 381:                 // invalid push.
 382:                 if (sp == 0)
 383:                   {
 384:                     // We saw a pop without a push.  Just ignore it.
 385:                     break;
 386:                   }
 387:                 byte newEmbedding = embeddingStack[--sp];
 388:                 currentEmbedding = (byte) (newEmbedding & 0x7f);
 389:                 if (newEmbedding < 0)
 390:                   directionalOverride
 391:                   = (((newEmbedding & 1) == 0)
 392:                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 393:                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 394:                 else
 395:                   directionalOverride = -1;
 396:               }
 397:               break;
 398:             default:
 399:               isSpecial = false;
 400:               break;
 401:           }
 402:         levels[i] = currentEmbedding;
 403:         if (isSpecial)
 404:           {
 405:             // Mark this character for removal.
 406:             if (formatterIndices == null)
 407:               formatterIndices = new ArrayList();
 408:             formatterIndices.add(Integer.valueOf(i));
 409:           }
 410:         else if (directionalOverride != -1)
 411:           types[i] = directionalOverride;
 412:       }
 413: 
 414:     // Remove the formatting codes and update both the arrays
 415:     // and 'length'.  It would be more efficient not to remove
 416:     // these codes, but it is also more complicated.  Also, the
 417:     // Unicode algorithm reference does not properly describe
 418:     // how this is to be done -- from what I can tell, their suggestions
 419:     // in this area will not yield the correct results.
 420:     if (formatterIndices == null)
 421:       return;
 422:     int output = 0, input = 0;
 423:     final int size = formatterIndices.size();
 424:     for (int i = 0; i <= size; ++i)
 425:       {
 426:         int nextFmt;
 427:         if (i == size)
 428:           nextFmt = length;
 429:         else
 430:           nextFmt = ((Integer) formatterIndices.get(i)).intValue();
 431:         // Non-formatter codes are from 'input' to 'nextFmt'.
 432:         int len = nextFmt - input;
 433:         System.arraycopy(levels, input, levels, output, len);
 434:         System.arraycopy(types, input, types, output, len);
 435:         output += len;
 436:         input = nextFmt + 1;
 437:       }
 438:     length -= formatterIndices.size();
 439:   }
 440: 
 441:   /**
 442:    * An internal function to compute the boundaries of runs
 443:    * in the text.  It isn't strictly necessary to do this, but
 444:    * it lets us write some following passes in a less complicated
 445:    * way.  Also it lets us efficiently implement some of the public
 446:    * methods.  A run is simply a sequence of characters at the
 447:    * same level.
 448:    */
 449:   private void computeRuns()
 450:   {
 451:     int runCount = 0;
 452:     int currentEmbedding = baseEmbedding;
 453:     for (int i = 0; i < length; ++i)
 454:       {
 455:         if (levels[i] != currentEmbedding)
 456:           {
 457:             currentEmbedding = levels[i];
 458:             ++runCount;
 459:           }
 460:       }
 461: 
 462:     // This may be called multiple times.  If so, and if
 463:     // the number of runs has not changed, then don't bother
 464:     // allocating a new array.
 465:     if (runs == null || runs.length != runCount + 1)
 466:       runs = new int[runCount + 1];
 467:     int where = 0;
 468:     int lastRunStart = 0;
 469:     currentEmbedding = baseEmbedding;
 470:     for (int i = 0; i < length; ++i)
 471:       {
 472:         if (levels[i] != currentEmbedding)
 473:           {
 474:             runs[where++] = lastRunStart;
 475:             lastRunStart = i;
 476:             currentEmbedding = levels[i];
 477:           }
 478:       }
 479:     runs[where++] = lastRunStart;
 480:   }
 481: 
 482:   /**
 483:    * An internal method to resolve weak types.  This implements
 484:    * rules W1 through W7.
 485:    */
 486:   private void resolveWeakTypes()
 487:   {
 488:     final int runCount = getRunCount();
 489:     
 490:     int previousLevel = baseEmbedding;
 491:     for (int run = 0; run < runCount; ++run)
 492:       {
 493:         int start = getRunStart(run);
 494:         int end = getRunLimit(run);
 495:         int level = getRunLevel(run);
 496: 
 497:         // These are the names used in the Bidi algorithm.
 498:         byte sor = (((Math.max(previousLevel, level) % 2) == 0)
 499:                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 500:                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 501:         int nextLevel;
 502:         if (run == runCount - 1)
 503:           nextLevel = baseEmbedding;
 504:         else
 505:           nextLevel = getRunLevel(run + 1);
 506:         byte eor = (((Math.max(level, nextLevel) % 2) == 0)
 507:                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 508:                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 509: 
 510:         byte prevType = sor;
 511:         byte prevStrongType = sor;
 512:         for (int i = start; i < end; ++i)
 513:           {
 514:             final byte nextType = (i == end - 1) ? eor : types[i + 1];
 515: 
 516:             // Rule W1: change NSM to the prevailing direction.
 517:             if (types[i] == Character.DIRECTIONALITY_NONSPACING_MARK)
 518:               types[i] = prevType;
 519:             else
 520:               prevType = types[i];
 521: 
 522:             // Rule W2: change EN to AN in some cases.
 523:             if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 524:               {
 525:                 if (prevStrongType == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
 526:                   types[i] = Character.DIRECTIONALITY_ARABIC_NUMBER;
 527:               }
 528:             else if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
 529:                      || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT
 530:                      || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
 531:               prevStrongType = types[i];
 532: 
 533:             // Rule W3: change AL to R.
 534:             if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
 535:               types[i] = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
 536: 
 537:             // Rule W4: handle separators between two numbers.
 538:             if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER
 539:                 && nextType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 540:               {
 541:                 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
 542:                     || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
 543:                   types[i] = nextType;
 544:               }
 545:             else if (prevType == Character.DIRECTIONALITY_ARABIC_NUMBER
 546:                      && nextType == Character.DIRECTIONALITY_ARABIC_NUMBER
 547:                      && types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
 548:               types[i] = nextType;
 549: 
 550:             // Rule W5: change a sequence of european terminators to
 551:             // european numbers, if they are adjacent to european numbers.
 552:             // We also include BN characters in this.
 553:             if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 554:                 || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
 555:               {
 556:                 if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 557:                   types[i] = prevType;
 558:                 else
 559:                   {
 560:                     // Look ahead to see if there is an EN terminating this
 561:                     // sequence of ETs.
 562:                     int j = i + 1;
 563:                     while (j < end
 564:                            && (types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 565:                                || types[j] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL))
 566:                       ++j;
 567:                     if (j < end
 568:                         && types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 569:                       {
 570:                         // Change them all to EN now.
 571:                         for (int k = i; k < j; ++k)
 572:                           types[k] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
 573:                       }
 574:                   }
 575:               }
 576: 
 577:             // Rule W6: separators and terminators change to ON.
 578:             // Again we include BN.
 579:             if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 580:                 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 581:                 || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
 582:                 || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
 583:               types[i] = Character.DIRECTIONALITY_OTHER_NEUTRALS;
 584: 
 585:             // Rule W7: change european number types.
 586:             if (prevStrongType == Character.DIRECTIONALITY_LEFT_TO_RIGHT
 587:                 && types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 588:               types[i] = prevStrongType;
 589:           }
 590: 
 591:         previousLevel = level;
 592:       }
 593:   }
 594: 
 595:   /**
 596:    * An internal method to resolve neutral types.  This implements
 597:    * rules N1 and N2.
 598:    */
 599:   private void resolveNeutralTypes()
 600:   {
 601:     // This implements rules N1 and N2.
 602:     final int runCount = getRunCount();
 603:     
 604:     int previousLevel = baseEmbedding;
 605:     for (int run = 0; run < runCount; ++run)
 606:       {
 607:         int start = getRunStart(run);
 608:         int end = getRunLimit(run);
 609:         int level = getRunLevel(run);
 610: 
 611:         byte embeddingDirection
 612:           = (((level % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 613:                                 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 614:         // These are the names used in the Bidi algorithm.
 615:         byte sor = (((Math.max(previousLevel, level) % 2) == 0)
 616:                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 617:                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 618:         int nextLevel;
 619:         if (run == runCount - 1)
 620:           nextLevel = baseEmbedding;
 621:         else
 622:           nextLevel = getRunLevel(run + 1);
 623:         byte eor = (((Math.max(level, nextLevel) % 2) == 0)
 624:                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 625:                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 626: 
 627:         byte prevStrong = sor;
 628:         int neutralStart = -1;
 629:         for (int i = start; i <= end; ++i)
 630:           {
 631:             byte newStrong = -1;
 632:             byte thisType = i == end ? eor : types[i];
 633:             switch (thisType)
 634:               {
 635:               case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
 636:                 newStrong = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
 637:                 break;
 638:               case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
 639:               case Character.DIRECTIONALITY_ARABIC_NUMBER:
 640:               case Character.DIRECTIONALITY_EUROPEAN_NUMBER:
 641:                 newStrong = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
 642:                 break;
 643:               case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
 644:               case Character.DIRECTIONALITY_OTHER_NEUTRALS:
 645:               case Character.DIRECTIONALITY_SEGMENT_SEPARATOR:
 646:               case Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR:
 647:           case Character.DIRECTIONALITY_WHITESPACE:
 648:                 if (neutralStart == -1)
 649:                   neutralStart = i;
 650:                 break;
 651:               }
 652:             // If we see a strong character, update all the neutrals.
 653:             if (newStrong != -1)
 654:               {
 655:                 if (neutralStart != -1)
 656:                   {
 657:                     byte override = (prevStrong == newStrong
 658:                                      ? prevStrong
 659:                                      : embeddingDirection);
 660:                     for (int j = neutralStart; j < i; ++j)
 661:                       types[j] = override;
 662:                   }
 663:                 prevStrong = newStrong;
 664:                 neutralStart = -1;
 665:               }
 666:           }
 667: 
 668:         previousLevel = level;
 669:       }
 670:   }
 671: 
 672:   /**
 673:    * An internal method to resolve implicit levels.
 674:    * This implements rules I1 and I2.
 675:    */
 676:   private void resolveImplicitLevels()
 677:   {
 678:     // This implements rules I1 and I2.
 679:     for (int i = 0; i < length; ++i)
 680:       {
 681:         if ((levels[i] & 1) == 0)
 682:           {
 683:             if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
 684:               ++levels[i];
 685:             else if (types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
 686:                      || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 687:               levels[i] += 2;
 688:           }
 689:         else
 690:           {
 691:             if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
 692:                 || types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
 693:                 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 694:               ++levels[i];
 695:           }
 696: 
 697:         // Update the result flags.
 698:         resultFlags |= 1 << (levels[i] & 1);
 699:       }
 700:     // One final update of the result flags, using the base level.
 701:     resultFlags |= 1 << baseEmbedding;
 702:   }
 703: 
 704:   /**
 705:    * This reinserts the formatting codes that we removed early on.
 706:    * Actually it does not insert formatting codes per se, but rather
 707:    * simply inserts new levels at the appropriate locations in the
 708:    * 'levels' array.
 709:    */
 710:   private void reinsertFormattingCodes()
 711:   {
 712:     if (formatterIndices == null)
 713:       return;
 714:     int input = length;
 715:     int output = levels.length;
 716:     // Process from the end as we are copying the array over itself here.
 717:     for (int index = formatterIndices.size() - 1; index >= 0; --index)
 718:       {
 719:         int nextFmt = ((Integer) formatterIndices.get(index)).intValue();
 720: 
 721:         // nextFmt points to a location in the original array.  So,
 722:         // nextFmt+1 is the target of our copying.  output is the location
 723:         // to which we last copied, thus we can derive the length of the
 724:         // copy from it.
 725:         int len = output - nextFmt - 1;
 726:         output = nextFmt;
 727:         input -= len;
 728:         // Note that we no longer need 'types' at this point, so we
 729:         // only edit 'levels'.
 730:         if (nextFmt + 1 < levels.length)
 731:           System.arraycopy(levels, input, levels, nextFmt + 1, len);
 732: 
 733:         // Now set the level at the reinsertion point.
 734:         int rightLevel;
 735:         if (output == levels.length - 1)
 736:           rightLevel = baseEmbedding;
 737:         else
 738:           rightLevel = levels[output + 1];
 739:         int leftLevel;
 740:         if (input == 0)
 741:           leftLevel = baseEmbedding;
 742:         else
 743:           leftLevel = levels[input];
 744:         levels[output] = (byte) Math.max(leftLevel, rightLevel);
 745:       }
 746:     length = levels.length;
 747:   }
 748: 
 749:   /**
 750:    * This is the main internal entry point.  After a constructor
 751:    * has initialized the appropriate local state, it will call
 752:    * this method to do all the work.
 753:    */
 754:   private void runBidi()
 755:   {
 756:     computeTypes();
 757:     baseEmbedding = computeParagraphEmbeddingLevel();
 758:     computeExplicitLevels();
 759:     computeRuns();
 760:     resolveWeakTypes();
 761:     resolveNeutralTypes();
 762:     resolveImplicitLevels();
 763:     // We're done with the types.  Let the GC clean up.
 764:     types = null;
 765:     reinsertFormattingCodes();
 766:     // After resolving the implicit levels, the number
 767:     // of runs may have changed.
 768:     computeRuns();
 769:   }
 770: 
 771:   /**
 772:    * Return true if the paragraph base embedding is left-to-right,
 773:    * false otherwise.
 774:    */
 775:   public boolean baseIsLeftToRight()
 776:   {
 777:     return baseEmbedding == DIRECTION_LEFT_TO_RIGHT;
 778:   }
 779: 
 780:   /**
 781:    * Create a new Bidi object for a single line of text, taken
 782:    * from the text used when creating the current Bidi object.
 783:    * @param start the index of the first character of the line
 784:    * @param end the index of the final character of the line
 785:    * @return a new Bidi object for the indicated line of text
 786:    */
 787:   public Bidi createLineBidi(int start, int end)
 788:   {
 789:     // This isn't the most efficient implementation possible.
 790:     // This probably does not matter, so we choose simplicity instead.
 791:     int level = getLevelAt(start);
 792:     int flag = (((level % 2) == 0)
 793:                 ? DIRECTION_LEFT_TO_RIGHT
 794:                 : DIRECTION_RIGHT_TO_LEFT);
 795:     return new Bidi(text, textOffset + start,
 796:                     embeddings, embeddingOffset + start,
 797:                     end - start, flag);
 798:   }
 799: 
 800:   /**
 801:    * Return the base embedding level of the paragraph.
 802:    */
 803:   public int getBaseLevel()
 804:   {
 805:     return baseEmbedding;
 806:   }
 807: 
 808:   /**
 809:    * Return the length of the paragraph, in characters.
 810:    */
 811:   public int getLength()
 812:   {
 813:     return length;
 814:   }
 815: 
 816:   /**
 817:    * Return the level at the indicated character.  If the
 818:    * supplied index is less than zero or greater than the length
 819:    * of the text, then the paragraph's base embedding level will
 820:    * be returned.
 821:    * @param offset the character to examine
 822:    * @return the level of that character
 823:    */
 824:   public int getLevelAt(int offset)
 825:   {
 826:     if (offset < 0 || offset >= length)
 827:       return getBaseLevel();
 828:     return levels[offset];
 829:   }
 830: 
 831:   /**
 832:    * Return the number of runs in the result.  A run is
 833:    * a sequence of characters at the same embedding level.
 834:    */
 835:   public int getRunCount()
 836:   {
 837:     return runs.length;
 838:   }
 839: 
 840:   /**
 841:    * Return the level of the indicated run.
 842:    * @param which the run to examine
 843:    * @return the level of that run
 844:    */
 845:   public int getRunLevel(int which)
 846:   {
 847:     return levels[runs[which]];
 848:   }
 849: 
 850:   /**
 851:    * Return the index of the character just following the end
 852:    * of the indicated run.
 853:    * @param which the run to examine
 854:    * @return the index of the character after the final character
 855:    * of the run
 856:    */
 857:   public int getRunLimit(int which)
 858:   {
 859:     if (which == runs.length - 1)
 860:       return length;
 861:     return runs[which + 1];
 862:   }
 863: 
 864:   /**
 865:    * Return the index of the first character in the indicated run.
 866:    * @param which the run to examine
 867:    * @return the index of the first character of the run
 868:    */
 869:   public int getRunStart(int which)
 870:   {
 871:     return runs[which];
 872:   }
 873: 
 874:   /**
 875:    * Return true if the text is entirely left-to-right, and the
 876:    * base embedding is also left-to-right.
 877:    */
 878:   public boolean isLeftToRight()
 879:   {
 880:     return resultFlags == LTOR;
 881:   }
 882: 
 883:   /**
 884:    * Return true if the text consists of mixed left-to-right and
 885:    * right-to-left runs, or if the text consists of one kind of run
 886:    * which differs from the base embedding direction.
 887:    */
 888:   public boolean isMixed()
 889:   {
 890:     return resultFlags == (LTOR | RTOL);
 891:   }
 892: 
 893:   /**
 894:    * Return true if the text is entirely right-to-left, and the
 895:    * base embedding is also right-to-left.
 896:    */
 897:   public boolean isRightToLeft()
 898:   {
 899:     return resultFlags == RTOL;
 900:   }
 901: 
 902:   /**
 903:    * Return a String describing the internal state of this object.
 904:    * This is only useful for debugging.
 905:    */
 906:   public String toString()
 907:   {
 908:     return "Bidi Bidi Bidi I like you, Buck!";
 909:   }
 910: 
 911:   /**
 912:    * Reorder objects according to the levels passed in.  This implements
 913:    * reordering as defined by the Unicode bidirectional layout specification.
 914:    * The levels are integers from 0 to 62; even numbers represent left-to-right
 915:    * runs, and odd numbers represent right-to-left runs.
 916:    *
 917:    * @param levels the levels associated with each object
 918:    * @param levelOffset the index of the first level to use
 919:    * @param objs the objects to reorder according to the levels
 920:    * @param objOffset the index of the first object to use
 921:    * @param count the number of objects (and levels) to manipulate
 922:    */
 923:   public static void reorderVisually(byte[] levels, int levelOffset,
 924:                                      Object[] objs, int objOffset, int count)
 925:   {
 926:     // We need a copy of the 'levels' array, as we are going to modify it.
 927:     // This is unfortunate but difficult to avoid.
 928:     byte[] levelCopy = new byte[count];
 929:     // Do this explicitly so we can also find the maximum depth at the
 930:     // same time.
 931:     int max = 0;
 932:     int lowestOdd = 63;
 933:     for (int i = 0; i < count; ++i)
 934:       {
 935:         levelCopy[i] = levels[levelOffset + i];
 936:         max = Math.max(levelCopy[i], max);
 937:         if (levelCopy[i] % 2 != 0)
 938:           lowestOdd = Math.min(lowestOdd, levelCopy[i]);
 939:       }
 940: 
 941:     // Reverse the runs starting with the deepest.
 942:     for (int depth = max; depth >= lowestOdd; --depth)
 943:       {
 944:         int start = 0;
 945:         while (start < count)
 946:           {
 947:             // Find the start of a run >= DEPTH.
 948:             while (start < count && levelCopy[start] < depth)
 949:               ++start;
 950:             if (start == count)
 951:               break;
 952:             // Find the end of the run.
 953:             int end = start + 1;
 954:             while (end < count && levelCopy[end] >= depth)
 955:               ++end;
 956: 
 957:             // Reverse this run.
 958:             for (int i = 0; i < (end - start) / 2; ++i)
 959:               {
 960:                 byte tmpb = levelCopy[end - i - 1];
 961:                 levelCopy[end - i - 1] = levelCopy[start + i];
 962:                 levelCopy[start + i] = tmpb;
 963:                 Object tmpo = objs[objOffset + end - i - 1];
 964:                 objs[objOffset + end - i - 1] = objs[objOffset + start + i];
 965:                 objs[objOffset + start + i] = tmpo;
 966:               }
 967: 
 968:             // Handle the next run.
 969:             start = end + 1;
 970:           }
 971:       }
 972:   }
 973: 
 974:   /**
 975:    * Returns false if all characters in the text between start and end
 976:    * are all left-to-right text. This implementation is just calls
 977:    * <code>Character.getDirectionality(char)</code> on all characters
 978:    * and makes sure all characters are either explicitly left-to-right
 979:    * or neutral in directionality (character types L, EN, ES, ET, AN,
 980:    * CS, S and WS).
 981:    */
 982:   public static boolean requiresBidi(char[] text, int start, int end)
 983:   {
 984:     for (int i = start; i < end; i++)
 985:       {
 986:     byte dir = Character.getDirectionality(text[i]);
 987:     if (dir != Character.DIRECTIONALITY_LEFT_TO_RIGHT
 988:         && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER
 989:         && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
 990:         && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 991:         && dir != Character.DIRECTIONALITY_ARABIC_NUMBER
 992:         && dir != Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
 993:         && dir != Character.DIRECTIONALITY_SEGMENT_SEPARATOR
 994:         && dir != Character.DIRECTIONALITY_WHITESPACE
 995:             && dir != Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR)
 996:       return true;
 997:       }
 998: 
 999:     return false;
1000:   }
1001: }