Source for java.awt.font.NumericShaper

   1: /* NumericShaper.java
   2:    Copyright (C) 2003 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.awt.font;
  40: 
  41: import java.io.Serializable;
  42: import java.lang.Character.UnicodeBlock;
  43: 
  44: /**
  45:  * This class handles numeric shaping.  A shaper can either be contextual
  46:  * or not.  A non-contextual shaper will always translate ASCII digits
  47:  * in its input into the target Unicode range.  A contextual shaper will
  48:  * change the target Unicode range depending on the characters it has
  49:  * previously processed.
  50:  *
  51:  * @author Michael Koch
  52:  * @author Tom Tromey
  53:  *
  54:  * @since 1.4
  55:  * @specnote This class does not handle LIMBU or OSMANYA.
  56:  * @specnote The JDK does not seem to properly handle ranges without a
  57:  * digit zero, such as TAMIL.  This implementation does.
  58:  */
  59: public final class NumericShaper implements Serializable
  60: {
  61:   private static final long serialVersionUID = -8022764705923730308L;
  62: 
  63:   /** Convenience constant representing all the valid Unicode ranges.  */
  64:   public static final int ALL_RANGES  = 524287;
  65: 
  66:   /**
  67:    * Constant representing the Unicode ARABIC range.  Shaping done
  68:    * using this range will translate to the arabic decimal characters.
  69:    * Use EASTERN_ARABIC if you want to shape to the eastern arabic
  70:    * (also known as the extended arabic) decimal characters.
  71:    */
  72:   public static final int ARABIC  = 2;
  73: 
  74:   /** Constant representing the Unicode BENGALI range.  */
  75:   public static final int BENGALI  = 16;
  76: 
  77:   /** Constant representing the Unicode DEVANAGARI range.  */
  78:   public static final int DEVANAGARI  = 8;
  79: 
  80:   /**
  81:    * Constant representing the Unicode extended arabic range.
  82:    * In Unicode there are two different sets of arabic digits;
  83:    * this selects the extended or eastern set.
  84:    */
  85:   public static final int EASTERN_ARABIC  = 4;
  86: 
  87:   /**
  88:    * Constant representing the Unicode ETHIOPIC range.  Note that
  89:    * there is no digit zero in this range; an ASCII digit zero
  90:    * is left unchanged when shaping to this range.
  91:    */
  92:   public static final int ETHIOPIC  = 65536;
  93: 
  94:   /**
  95:    * Constant representing the Unicode EUROPEAN range.  For
  96:    * contextual shaping purposes, characters in the various
  97:    * extended Latin character blocks are recognized as EUROPEAN.
  98:    */
  99:   public static final int EUROPEAN  = 1;
 100: 
 101:   /** Constant representing the Unicode GUJARATI range.  */
 102:   public static final int GUJARATI  = 64;
 103: 
 104:   /** Constant representing the Unicode GURMUKHI range.  */
 105:   public static final int GURMUKHI  = 32;
 106: 
 107:   /** Constant representing the Unicode KANNADA range.  */
 108:   public static final int KANNADA  = 1024;
 109: 
 110:   /** Constant representing the Unicode KHMER range.  */
 111:   public static final int KHMER  = 131072;
 112: 
 113:   /** Constant representing the Unicode LAO range.  */
 114:   public static final int LAO  = 8192;
 115: 
 116:   /** Constant representing the Unicode MALAYALAM range.  */
 117:   public static final int MALAYALAM  = 2048;
 118: 
 119:   /** Constant representing the Unicode MONGOLIAN range.  */
 120:   public static final int MONGOLIAN  = 262144;
 121: 
 122:   /** Constant representing the Unicode MYANMAR range.  */
 123:   public static final int MYANMAR  = 32768;
 124: 
 125:   /** Constant representing the Unicode ORIYA range.  */
 126:   public static final int ORIYA  = 128;
 127: 
 128:   /**
 129:    * Constant representing the Unicode TAMIL range.  Note that
 130:    * there is no digit zero in this range; an ASCII digit zero
 131:    * is left unchanged when shaping to this range.
 132:    */
 133:   public static final int TAMIL  = 256;
 134: 
 135:   /** Constant representing the Unicode TELUGU range.  */
 136:   public static final int TELUGU  = 512;
 137: 
 138:   /** Constant representing the Unicode THAI range.  */
 139:   public static final int THAI  = 4096;
 140: 
 141:   /** Constant representing the Unicode TIBETAN range.  */
 142:   public static final int TIBETAN  = 16384;
 143: 
 144:   /**
 145:    * This table holds the zero digits for each language.  This is hard-coded
 146:    * because the values will not change and the table layout is tied to the
 147:    * other constants in this class in any case.  In the two places where a
 148:    * language does not have a zero digit, the character immediately preceeding
 149:    * the one digit is used instead.  These languages are special-cased in
 150:    * the shaping code.
 151:    */
 152:   private static final char[] zeroDigits =
 153:   {
 154:     '0',      // EUROPEAN
 155:     '\u0660', // ARABIC
 156:     '\u06f0', // EASTERN_ARABIC
 157:     '\u0966', // DEVANAGARI
 158:     '\u09e6', // BENGALI
 159:     '\u0a66', // GURMUKHI
 160:     '\u0ae6', // GUJARATI
 161:     '\u0b66', // ORIYA
 162:     '\u0be6', // TAMIL - special case as there is no digit zero
 163:     '\u0c66', // TELUGU
 164:     '\u0ce6', // KANNADA
 165:     '\u0d66', // MALAYALAM
 166:     '\u0e50', // THAI
 167:     '\u0ed0', // LAO
 168:     '\u0f20', // TIBETAN
 169:     '\u1040', // MYANMAR
 170:     '\u1368', // ETHIOPIC - special case as there is no digit zero
 171:     '\u17e0', // KHMER
 172:     '\u1810'  // MONGOLIAN
 173:   };
 174: 
 175:   /**
 176:    * The default initial context for this shaper, specified as
 177:    * an integer from 0 to 18.
 178:    */
 179:   private int key;
 180: 
 181:   /**
 182:    * The target ranges handled by this shaper.  If the shaper
 183:    * is not contextual, the high bit of this field will be set.
 184:    * @specnote This was discovered by reading the serialization spec
 185:    */
 186:   private int mask;
 187: 
 188:   /**
 189:    * Create a new numeric shaper.  The key given is a constant from
 190:    * this class, the constructor turns it into its internal form.
 191:    * @param key the key to use, as one of the manifest constants
 192:    * @param mask a mask of languages to shape for
 193:    */
 194:   private NumericShaper (int key, int mask)
 195:   {
 196:     // This internal form is a bit goofy, but it is specified by
 197:     // the serialization spec.
 198:     this.key = Integer.numberOfTrailingZeros(key);
 199:     this.mask = mask;
 200:   }
 201: 
 202:   /**
 203:    * Return an integer representing all the languages for which this
 204:    * shaper will shape.  The result is taken by "or"ing together
 205:    * the constants representing the various languages.
 206:    */
 207:   public int getRanges ()
 208:   {
 209:     return mask & ALL_RANGES;
 210:   }
 211: 
 212:   /**
 213:    * Return true if this shaper is contextual, false if it is not.
 214:    */
 215:   public boolean isContextual ()
 216:   {
 217:     return mask > 0;
 218:   }
 219: 
 220:   /**
 221:    * Shape the text in the given array.  The starting context will
 222:    * be the context passed to the shaper at creation time.
 223:    * @param text the text to shape
 224:    * @param start the index of the starting character of the array
 225:    * @param count the number of characters in the array
 226:    */
 227:   public void shape (char[] text, int start, int count)
 228:   {
 229:     shape (text, start, count, 1 << key);
 230:   }
 231: 
 232:   /**
 233:    * Given a unicode block object, return corresponding language constant.
 234:    * If the block is not recognized, returns zero.  Note that as there
 235:    * is no separate ARABIC block in Character, this case must
 236:    * be specially handled by the caller; EASTERN_ARABIC is preferred when
 237:    * both are specified.
 238:    * @param b the unicode block to classify
 239:    * @return the language constant, or zero if not recognized
 240:    */
 241:   private int classify(UnicodeBlock b)
 242:   {
 243:     if (b == null)
 244:       return 0;
 245:     // ARABIC is handled by the caller; from testing we know
 246:     // that EASTERN_ARABIC takes precedence.
 247:     if (b == UnicodeBlock.ARABIC)
 248:       return EASTERN_ARABIC;
 249:     if (b == UnicodeBlock.BENGALI)
 250:       return BENGALI;
 251:     if (b == UnicodeBlock.DEVANAGARI)
 252:       return DEVANAGARI;
 253:     if (b == UnicodeBlock.ETHIOPIC)
 254:       return ETHIOPIC;
 255:     if (b == UnicodeBlock.BASIC_LATIN
 256:         || b == UnicodeBlock.LATIN_1_SUPPLEMENT
 257:         || b == UnicodeBlock.LATIN_EXTENDED_A
 258:         || b == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
 259:         || b == UnicodeBlock.LATIN_EXTENDED_B)
 260:       return EUROPEAN;
 261:     if (b == UnicodeBlock.GUJARATI)
 262:       return GUJARATI;
 263:     if (b == UnicodeBlock.GURMUKHI)
 264:       return GURMUKHI;
 265:     if (b == UnicodeBlock.KANNADA)
 266:       return KANNADA;
 267:     if (b == UnicodeBlock.KHMER)
 268:       return KHMER;
 269:     if (b == UnicodeBlock.LAO)
 270:       return LAO;
 271:     if (b == UnicodeBlock.MALAYALAM)
 272:       return MALAYALAM;
 273:     if (b == UnicodeBlock.MONGOLIAN)
 274:       return MONGOLIAN;
 275:     if (b == UnicodeBlock.MYANMAR)
 276:       return MYANMAR;
 277:     if (b == UnicodeBlock.ORIYA)
 278:       return ORIYA;
 279:     if (b == UnicodeBlock.TAMIL)
 280:       return TAMIL;
 281:     if (b == UnicodeBlock.TELUGU)
 282:       return TELUGU;
 283:     if (b == UnicodeBlock.THAI)
 284:       return THAI;
 285:     if (b == UnicodeBlock.TIBETAN)
 286:       return TIBETAN;
 287:     return 0;
 288:   }
 289: 
 290:   /**
 291:    * Shape the given text, using the indicated initial context.
 292:    * If this shaper is not a contextual shaper, then the given context
 293:    * will be ignored.
 294:    * @param text the text to shape
 295:    * @param start the index of the first character of the text to shape
 296:    * @param count the number of characters to shape in the text
 297:    * @param context the initial context
 298:    * @throws IllegalArgumentException if the initial context is invalid
 299:    */
 300:   public void shape (char[] text, int start, int count, int context)
 301:   {
 302:     int currentContext;
 303:     if (isContextual())
 304:       {
 305:         if (Integer.bitCount(context) != 1 || (context & ~ALL_RANGES) != 0)
 306:           throw new IllegalArgumentException("invalid context argument");
 307:         // If the indicated context is not one we are handling, reset it.
 308:         if ((context & mask) == 0)
 309:           currentContext = -1;
 310:         else
 311:           currentContext = Integer.numberOfTrailingZeros(context);
 312:       }
 313:     else
 314:       currentContext = key;
 315: 
 316:     for (int i = 0; i < count; ++i)
 317:       {
 318:         char c = text[start + i];
 319:         if (c >= '0' && c <= '9')
 320:           {
 321:             if (currentContext >= 0)
 322:               {
 323:                 // Shape into the current context.
 324:                 if (c == '0'
 325:                   && ((1 << currentContext) == TAMIL
 326:                       || (1 << currentContext) == ETHIOPIC))
 327:                   {
 328:                     // No digit 0 in this context; do nothing.
 329:                   }
 330:                 else
 331:                   text[start + i]
 332:                     = (char) (zeroDigits[currentContext] + c - '0');
 333:               }
 334:           }
 335:         else if (isContextual())
 336:           {
 337:             // if c is in a group, set currentContext; else reset it.
 338:             int group = classify(UnicodeBlock.of(c));
 339:             // Specially handle ARABIC.
 340:             if (group == EASTERN_ARABIC && (mask & EASTERN_ARABIC) == 0
 341:                 && (mask & ARABIC) != 0)
 342:               group = ARABIC;
 343:             if ((mask & group) != 0)
 344:               {
 345:                 // The character was classified as being in a group
 346:                 // we recognize, and it was selected by the shaper.
 347:                 // So, change the context.
 348:                 currentContext = Integer.numberOfTrailingZeros(group);
 349:               }
 350:           }
 351:       }
 352:   }
 353: 
 354:   public boolean equals (Object obj)
 355:   {
 356:     if (! (obj instanceof NumericShaper))
 357:       return false;
 358:     NumericShaper tmp = (NumericShaper) obj;
 359:     return key == tmp.key && mask == tmp.mask;
 360:   }
 361: 
 362:   public int hashCode ()
 363:   {
 364:     return key ^ mask;
 365:   }
 366: 
 367:   public String toString ()
 368:   {
 369:     // For debugging only.
 370:     return "key=" + key + "; mask=" + mask;
 371:   }
 372: 
 373:   /**
 374:    * Return a non-contextual shaper which can shape to a single range.
 375:    * All ASCII digits in the input text are translated to this language.
 376:    * @param singleRange the target language
 377:    * @return a non-contextual shaper for this language
 378:    * @throws IllegalArgumentException if the argument does not name a
 379:    * single language, as specified by the constants declared in this class
 380:    */
 381:   public static NumericShaper getShaper (int singleRange)
 382:   {
 383:     if (Integer.bitCount(singleRange) != 1)
 384:       throw new IllegalArgumentException("more than one bit set in argument");
 385:     if ((singleRange & ~ALL_RANGES) != 0)
 386:       throw new IllegalArgumentException("argument out of range");
 387:     return new NumericShaper(singleRange, Integer.MIN_VALUE | singleRange);
 388:   }
 389: 
 390:   /**
 391:    * Return a contextual shaper which can shape to any of the indicated
 392:    * languages.  The default initial context for this shaper is EUROPEAN.
 393:    * @param ranges the ranges to shape to
 394:    * @return a contextual shaper which will target any of these ranges
 395:    * @throws IllegalArgumentException if the argument specifies an
 396:    * unrecognized range
 397:    */
 398:   public static NumericShaper getContextualShaper (int ranges)
 399:   {
 400:     if ((ranges & ~ALL_RANGES) != 0)
 401:       throw new IllegalArgumentException("argument out of range");
 402:     return new NumericShaper(EUROPEAN, ranges);
 403:   }
 404: 
 405:   /**
 406:    * Return a contextual shaper which can shape to any of the indicated
 407:    * languages.  The default initial context for this shaper is given as
 408:    * an argument.
 409:    * @param ranges the ranges to shape to
 410:    * @param defaultContext the default initial context
 411:    * @return a contextual shaper which will target any of these ranges
 412:    * @throws IllegalArgumentException if the ranges argument specifies an
 413:    * unrecognized range, or if the defaultContext argument does not specify
 414:    * a single valid range
 415:    */
 416:   public static NumericShaper getContextualShaper (int ranges,
 417:                                                    int defaultContext)
 418:   {
 419:     if (Integer.bitCount(defaultContext) != 1)
 420:       throw new IllegalArgumentException("more than one bit set in context");
 421:     if ((ranges & ~ALL_RANGES) != 0 || (defaultContext & ~ALL_RANGES) != 0)
 422:       throw new IllegalArgumentException("argument out of range");
 423:     return new NumericShaper(defaultContext, ranges);
 424:   }
 425: }