Source for java.lang.Character

   1: /* java.lang.Character -- Wrapper class for char, and Unicode subsets
   2:    Copyright (C) 1998, 1999, 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.lang;
  40: 
  41: import gnu.java.lang.CharData;
  42: 
  43: import java.io.Serializable;
  44: import java.text.Collator;
  45: import java.util.Locale;
  46: 
  47: /**
  48:  * Wrapper class for the primitive char data type.  In addition, this class
  49:  * allows one to retrieve property information and perform transformations
  50:  * on the defined characters in the Unicode Standard, Version 4.0.0.
  51:  * java.lang.Character is designed to be very dynamic, and as such, it
  52:  * retrieves information on the Unicode character set from a separate
  53:  * database, gnu.java.lang.CharData, which can be easily upgraded.
  54:  *
  55:  * <p>For predicates, boundaries are used to describe
  56:  * the set of characters for which the method will return true.
  57:  * This syntax uses fairly normal regular expression notation.
  58:  * See 5.13 of the Unicode Standard, Version 4.0, for the
  59:  * boundary specification.
  60:  *
  61:  * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
  62:  * for more information on the Unicode Standard.
  63:  *
  64:  * @author Tom Tromey (tromey@cygnus.com)
  65:  * @author Paul N. Fisher
  66:  * @author Jochen Hoenicke
  67:  * @author Eric Blake (ebb9@email.byu.edu)
  68:  * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
  69:  * @see CharData
  70:  * @since 1.0
  71:  * @status partly updated to 1.5; some things still missing
  72:  */
  73: public final class Character implements Serializable, Comparable<Character>
  74: {
  75:   /**
  76:    * A subset of Unicode blocks.
  77:    *
  78:    * @author Paul N. Fisher
  79:    * @author Eric Blake (ebb9@email.byu.edu)
  80:    * @since 1.2
  81:    */
  82:   public static class Subset
  83:   {
  84:     /** The name of the subset. */
  85:     private final String name;
  86: 
  87:     /**
  88:      * Construct a new subset of characters.
  89:      *
  90:      * @param name the name of the subset
  91:      * @throws NullPointerException if name is null
  92:      */
  93:     protected Subset(String name)
  94:     {
  95:       // Note that name.toString() is name, unless name was null.
  96:       this.name = name.toString();
  97:     }
  98: 
  99:     /**
 100:      * Compares two Subsets for equality. This is <code>final</code>, and
 101:      * restricts the comparison on the <code>==</code> operator, so it returns
 102:      * true only for the same object.
 103:      *
 104:      * @param o the object to compare
 105:      * @return true if o is this
 106:      */
 107:     public final boolean equals(Object o)
 108:     {
 109:       return o == this;
 110:     }
 111: 
 112:     /**
 113:      * Makes the original hashCode of Object final, to be consistent with
 114:      * equals.
 115:      *
 116:      * @return the hash code for this object
 117:      */
 118:     public final int hashCode()
 119:     {
 120:       return super.hashCode();
 121:     }
 122: 
 123:     /**
 124:      * Returns the name of the subset.
 125:      *
 126:      * @return the name
 127:      */
 128:     public final String toString()
 129:     {
 130:       return name;
 131:     }
 132:   } // class Subset
 133: 
 134:   /**
 135:    * A family of character subsets in the Unicode specification. A character
 136:    * is in at most one of these blocks.
 137:    *
 138:    * This inner class was generated automatically from
 139:    * <code>doc/unicode/Blocks-4.0.0.txt</code>, by some perl scripts.
 140:    * This Unicode definition file can be found on the
 141:    * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
 142:    * JDK 1.5 uses Unicode version 4.0.0.
 143:    *
 144:    * @author scripts/unicode-blocks.pl (written by Eric Blake)
 145:    * @since 1.2
 146:    */
 147:   public static final class UnicodeBlock extends Subset
 148:   {
 149:     /** The start of the subset. */
 150:     private final int start;
 151: 
 152:     /** The end of the subset. */
 153:     private final int end;
 154: 
 155:     /** The canonical name of the block according to the Unicode standard. */
 156:     private final String canonicalName;
 157: 
 158:     /** Enumeration for the <code>forName()</code> method */
 159:     private enum NameType { CANONICAL, NO_SPACES, CONSTANT; }
 160: 
 161:     /**
 162:      * Constructor for strictly defined blocks.
 163:      *
 164:      * @param start the start character of the range
 165:      * @param end the end character of the range
 166:      * @param name the block name
 167:      * @param canonicalName the name of the block as defined in the Unicode
 168:      *        standard.
 169:      */
 170:     private UnicodeBlock(int start, int end, String name,
 171:              String canonicalName)
 172:     {
 173:       super(name);
 174:       this.start = start;
 175:       this.end = end;
 176:       this.canonicalName = canonicalName;
 177:     }
 178: 
 179:     /**
 180:      * Returns the Unicode character block which a character belongs to.
 181:      * <strong>Note</strong>: This method does not support the use of
 182:      * supplementary characters.  For such support, <code>of(int)</code>
 183:      * should be used instead.
 184:      *
 185:      * @param ch the character to look up
 186:      * @return the set it belongs to, or null if it is not in one
 187:      */
 188:     public static UnicodeBlock of(char ch)
 189:     {
 190:       return of((int) ch);
 191:     }
 192: 
 193:     /**
 194:      * Returns the Unicode character block which a code point belongs to.
 195:      *
 196:      * @param codePoint the character to look up
 197:      * @return the set it belongs to, or null if it is not in one.
 198:      * @throws IllegalArgumentException if the specified code point is
 199:      *         invalid.
 200:      * @since 1.5
 201:      */
 202:     public static UnicodeBlock of(int codePoint)
 203:     {
 204:       if (codePoint > MAX_CODE_POINT)
 205:     throw new IllegalArgumentException("The supplied integer value is " +
 206:                        "too large to be a codepoint.");
 207:       // Simple binary search for the correct block.
 208:       int low = 0;
 209:       int hi = sets.length - 1;
 210:       while (low <= hi)
 211:         {
 212:           int mid = (low + hi) >> 1;
 213:           UnicodeBlock b = sets[mid];
 214:           if (codePoint < b.start)
 215:             hi = mid - 1;
 216:           else if (codePoint > b.end)
 217:             low = mid + 1;
 218:           else
 219:             return b;
 220:         }
 221:       return null;
 222:     }
 223: 
 224:     /**
 225:      * <p>
 226:      * Returns the <code>UnicodeBlock</code> with the given name, as defined
 227:      * by the Unicode standard.  The version of Unicode in use is defined by
 228:      * the <code>Character</code> class, and the names are given in the
 229:      * <code>Blocks-<version>.txt</code> file corresponding to that version.
 230:      * The name may be specified in one of three ways:
 231:      * </p>
 232:      * <ol>
 233:      * <li>The canonical, human-readable name used by the Unicode standard.
 234:      * This is the name with all spaces and hyphens retained.  For example,
 235:      * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li>
 236:      * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li>
 237:      * <li>The name used for the constants specified by this class, which
 238:      * is the canonical name with all spaces and hyphens replaced with
 239:      * underscores e.g. `BASIC_LATIN'</li>
 240:      * </ol>
 241:      * <p>
 242:      * The names are compared case-insensitively using the case comparison
 243:      * associated with the U.S. English locale.  The method recognises the
 244:      * previous names used for blocks as well as the current ones.  At
 245:      * present, this simply means that the deprecated `SURROGATES_AREA'
 246:      * will be recognised by this method (the <code>of()</code> methods
 247:      * only return one of the three new surrogate blocks).
 248:      * </p>
 249:      *
 250:      * @param blockName the name of the block to look up.
 251:      * @return the specified block.
 252:      * @throws NullPointerException if the <code>blockName</code> is
 253:      *         <code>null</code>.
 254:      * @throws IllegalArgumentException if the name does not match any Unicode
 255:      *         block.
 256:      * @since 1.5
 257:      */
 258:     public static final UnicodeBlock forName(String blockName)
 259:     {
 260:       NameType type;
 261:       if (blockName.indexOf(' ') != -1)
 262:         type = NameType.CANONICAL;
 263:       else if (blockName.indexOf('_') != -1)
 264:         type = NameType.CONSTANT;
 265:       else
 266:         type = NameType.NO_SPACES;
 267:       Collator usCollator = Collator.getInstance(Locale.US);
 268:       usCollator.setStrength(Collator.PRIMARY);
 269:       /* Special case for deprecated blocks not in sets */
 270:       switch (type)
 271:       {
 272:         case CANONICAL:
 273:           if (usCollator.compare(blockName, "Surrogates Area") == 0)
 274:             return SURROGATES_AREA;
 275:           break;
 276:         case NO_SPACES:
 277:           if (usCollator.compare(blockName, "SurrogatesArea") == 0)
 278:             return SURROGATES_AREA;
 279:           break;
 280:         case CONSTANT:
 281:           if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) 
 282:             return SURROGATES_AREA;
 283:           break;
 284:       }
 285:       /* Other cases */
 286:       switch (type)
 287:       {
 288:         case CANONICAL:
 289:           for (UnicodeBlock block : sets)
 290:             if (usCollator.compare(blockName, block.canonicalName) == 0)
 291:               return block;
 292:           break;
 293:         case NO_SPACES:
 294:           for (UnicodeBlock block : sets)
 295:         {
 296:           String nsName = block.canonicalName.replaceAll(" ","");
 297:           if (usCollator.compare(blockName, nsName) == 0)
 298:         return block;
 299:         }
 300:       break;
 301:         case CONSTANT:
 302:           for (UnicodeBlock block : sets)
 303:             if (usCollator.compare(blockName, block.toString()) == 0)
 304:               return block;
 305:           break;
 306:       }
 307:       throw new IllegalArgumentException("No Unicode block found for " +
 308:                                          blockName + ".");
 309:     }
 310: 
 311:     /**
 312:      * Basic Latin.
 313:      * 0x0000 - 0x007F.
 314:      */
 315:     public static final UnicodeBlock BASIC_LATIN
 316:       = new UnicodeBlock(0x0000, 0x007F,
 317:                          "BASIC_LATIN", 
 318:                          "Basic Latin");
 319: 
 320:     /**
 321:      * Latin-1 Supplement.
 322:      * 0x0080 - 0x00FF.
 323:      */
 324:     public static final UnicodeBlock LATIN_1_SUPPLEMENT
 325:       = new UnicodeBlock(0x0080, 0x00FF,
 326:                          "LATIN_1_SUPPLEMENT", 
 327:                          "Latin-1 Supplement");
 328: 
 329:     /**
 330:      * Latin Extended-A.
 331:      * 0x0100 - 0x017F.
 332:      */
 333:     public static final UnicodeBlock LATIN_EXTENDED_A
 334:       = new UnicodeBlock(0x0100, 0x017F,
 335:                          "LATIN_EXTENDED_A", 
 336:                          "Latin Extended-A");
 337: 
 338:     /**
 339:      * Latin Extended-B.
 340:      * 0x0180 - 0x024F.
 341:      */
 342:     public static final UnicodeBlock LATIN_EXTENDED_B
 343:       = new UnicodeBlock(0x0180, 0x024F,
 344:                          "LATIN_EXTENDED_B", 
 345:                          "Latin Extended-B");
 346: 
 347:     /**
 348:      * IPA Extensions.
 349:      * 0x0250 - 0x02AF.
 350:      */
 351:     public static final UnicodeBlock IPA_EXTENSIONS
 352:       = new UnicodeBlock(0x0250, 0x02AF,
 353:                          "IPA_EXTENSIONS", 
 354:                          "IPA Extensions");
 355: 
 356:     /**
 357:      * Spacing Modifier Letters.
 358:      * 0x02B0 - 0x02FF.
 359:      */
 360:     public static final UnicodeBlock SPACING_MODIFIER_LETTERS
 361:       = new UnicodeBlock(0x02B0, 0x02FF,
 362:                          "SPACING_MODIFIER_LETTERS", 
 363:                          "Spacing Modifier Letters");
 364: 
 365:     /**
 366:      * Combining Diacritical Marks.
 367:      * 0x0300 - 0x036F.
 368:      */
 369:     public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
 370:       = new UnicodeBlock(0x0300, 0x036F,
 371:                          "COMBINING_DIACRITICAL_MARKS", 
 372:                          "Combining Diacritical Marks");
 373: 
 374:     /**
 375:      * Greek.
 376:      * 0x0370 - 0x03FF.
 377:      */
 378:     public static final UnicodeBlock GREEK
 379:       = new UnicodeBlock(0x0370, 0x03FF,
 380:                          "GREEK", 
 381:                          "Greek");
 382: 
 383:     /**
 384:      * Cyrillic.
 385:      * 0x0400 - 0x04FF.
 386:      */
 387:     public static final UnicodeBlock CYRILLIC
 388:       = new UnicodeBlock(0x0400, 0x04FF,
 389:                          "CYRILLIC", 
 390:                          "Cyrillic");
 391: 
 392:     /**
 393:      * Cyrillic Supplementary.
 394:      * 0x0500 - 0x052F.
 395:      * @since 1.5
 396:      */
 397:     public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
 398:       = new UnicodeBlock(0x0500, 0x052F,
 399:                          "CYRILLIC_SUPPLEMENTARY", 
 400:                          "Cyrillic Supplementary");
 401: 
 402:     /**
 403:      * Armenian.
 404:      * 0x0530 - 0x058F.
 405:      */
 406:     public static final UnicodeBlock ARMENIAN
 407:       = new UnicodeBlock(0x0530, 0x058F,
 408:                          "ARMENIAN", 
 409:                          "Armenian");
 410: 
 411:     /**
 412:      * Hebrew.
 413:      * 0x0590 - 0x05FF.
 414:      */
 415:     public static final UnicodeBlock HEBREW
 416:       = new UnicodeBlock(0x0590, 0x05FF,
 417:                          "HEBREW", 
 418:                          "Hebrew");
 419: 
 420:     /**
 421:      * Arabic.
 422:      * 0x0600 - 0x06FF.
 423:      */
 424:     public static final UnicodeBlock ARABIC
 425:       = new UnicodeBlock(0x0600, 0x06FF,
 426:                          "ARABIC", 
 427:                          "Arabic");
 428: 
 429:     /**
 430:      * Syriac.
 431:      * 0x0700 - 0x074F.
 432:      * @since 1.4
 433:      */
 434:     public static final UnicodeBlock SYRIAC
 435:       = new UnicodeBlock(0x0700, 0x074F,
 436:                          "SYRIAC", 
 437:                          "Syriac");
 438: 
 439:     /**
 440:      * Thaana.
 441:      * 0x0780 - 0x07BF.
 442:      * @since 1.4
 443:      */
 444:     public static final UnicodeBlock THAANA
 445:       = new UnicodeBlock(0x0780, 0x07BF,
 446:                          "THAANA", 
 447:                          "Thaana");
 448: 
 449:     /**
 450:      * Devanagari.
 451:      * 0x0900 - 0x097F.
 452:      */
 453:     public static final UnicodeBlock DEVANAGARI
 454:       = new UnicodeBlock(0x0900, 0x097F,
 455:                          "DEVANAGARI", 
 456:                          "Devanagari");
 457: 
 458:     /**
 459:      * Bengali.
 460:      * 0x0980 - 0x09FF.
 461:      */
 462:     public static final UnicodeBlock BENGALI
 463:       = new UnicodeBlock(0x0980, 0x09FF,
 464:                          "BENGALI", 
 465:                          "Bengali");
 466: 
 467:     /**
 468:      * Gurmukhi.
 469:      * 0x0A00 - 0x0A7F.
 470:      */
 471:     public static final UnicodeBlock GURMUKHI
 472:       = new UnicodeBlock(0x0A00, 0x0A7F,
 473:                          "GURMUKHI", 
 474:                          "Gurmukhi");
 475: 
 476:     /**
 477:      * Gujarati.
 478:      * 0x0A80 - 0x0AFF.
 479:      */
 480:     public static final UnicodeBlock GUJARATI
 481:       = new UnicodeBlock(0x0A80, 0x0AFF,
 482:                          "GUJARATI", 
 483:                          "Gujarati");
 484: 
 485:     /**
 486:      * Oriya.
 487:      * 0x0B00 - 0x0B7F.
 488:      */
 489:     public static final UnicodeBlock ORIYA
 490:       = new UnicodeBlock(0x0B00, 0x0B7F,
 491:                          "ORIYA", 
 492:                          "Oriya");
 493: 
 494:     /**
 495:      * Tamil.
 496:      * 0x0B80 - 0x0BFF.
 497:      */
 498:     public static final UnicodeBlock TAMIL
 499:       = new UnicodeBlock(0x0B80, 0x0BFF,
 500:                          "TAMIL", 
 501:                          "Tamil");
 502: 
 503:     /**
 504:      * Telugu.
 505:      * 0x0C00 - 0x0C7F.
 506:      */
 507:     public static final UnicodeBlock TELUGU
 508:       = new UnicodeBlock(0x0C00, 0x0C7F,
 509:                          "TELUGU", 
 510:                          "Telugu");
 511: 
 512:     /**
 513:      * Kannada.
 514:      * 0x0C80 - 0x0CFF.
 515:      */
 516:     public static final UnicodeBlock KANNADA
 517:       = new UnicodeBlock(0x0C80, 0x0CFF,
 518:                          "KANNADA", 
 519:                          "Kannada");
 520: 
 521:     /**
 522:      * Malayalam.
 523:      * 0x0D00 - 0x0D7F.
 524:      */
 525:     public static final UnicodeBlock MALAYALAM
 526:       = new UnicodeBlock(0x0D00, 0x0D7F,
 527:                          "MALAYALAM", 
 528:                          "Malayalam");
 529: 
 530:     /**
 531:      * Sinhala.
 532:      * 0x0D80 - 0x0DFF.
 533:      * @since 1.4
 534:      */
 535:     public static final UnicodeBlock SINHALA
 536:       = new UnicodeBlock(0x0D80, 0x0DFF,
 537:                          "SINHALA", 
 538:                          "Sinhala");
 539: 
 540:     /**
 541:      * Thai.
 542:      * 0x0E00 - 0x0E7F.
 543:      */
 544:     public static final UnicodeBlock THAI
 545:       = new UnicodeBlock(0x0E00, 0x0E7F,
 546:                          "THAI", 
 547:                          "Thai");
 548: 
 549:     /**
 550:      * Lao.
 551:      * 0x0E80 - 0x0EFF.
 552:      */
 553:     public static final UnicodeBlock LAO
 554:       = new UnicodeBlock(0x0E80, 0x0EFF,
 555:                          "LAO", 
 556:                          "Lao");
 557: 
 558:     /**
 559:      * Tibetan.
 560:      * 0x0F00 - 0x0FFF.
 561:      */
 562:     public static final UnicodeBlock TIBETAN
 563:       = new UnicodeBlock(0x0F00, 0x0FFF,
 564:                          "TIBETAN", 
 565:                          "Tibetan");
 566: 
 567:     /**
 568:      * Myanmar.
 569:      * 0x1000 - 0x109F.
 570:      * @since 1.4
 571:      */
 572:     public static final UnicodeBlock MYANMAR
 573:       = new UnicodeBlock(0x1000, 0x109F,
 574:                          "MYANMAR", 
 575:                          "Myanmar");
 576: 
 577:     /**
 578:      * Georgian.
 579:      * 0x10A0 - 0x10FF.
 580:      */
 581:     public static final UnicodeBlock GEORGIAN
 582:       = new UnicodeBlock(0x10A0, 0x10FF,
 583:                          "GEORGIAN", 
 584:                          "Georgian");
 585: 
 586:     /**
 587:      * Hangul Jamo.
 588:      * 0x1100 - 0x11FF.
 589:      */
 590:     public static final UnicodeBlock HANGUL_JAMO
 591:       = new UnicodeBlock(0x1100, 0x11FF,
 592:                          "HANGUL_JAMO", 
 593:                          "Hangul Jamo");
 594: 
 595:     /**
 596:      * Ethiopic.
 597:      * 0x1200 - 0x137F.
 598:      * @since 1.4
 599:      */
 600:     public static final UnicodeBlock ETHIOPIC
 601:       = new UnicodeBlock(0x1200, 0x137F,
 602:                          "ETHIOPIC", 
 603:                          "Ethiopic");
 604: 
 605:     /**
 606:      * Cherokee.
 607:      * 0x13A0 - 0x13FF.
 608:      * @since 1.4
 609:      */
 610:     public static final UnicodeBlock CHEROKEE
 611:       = new UnicodeBlock(0x13A0, 0x13FF,
 612:                          "CHEROKEE", 
 613:                          "Cherokee");
 614: 
 615:     /**
 616:      * Unified Canadian Aboriginal Syllabics.
 617:      * 0x1400 - 0x167F.
 618:      * @since 1.4
 619:      */
 620:     public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
 621:       = new UnicodeBlock(0x1400, 0x167F,
 622:                          "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 
 623:                          "Unified Canadian Aboriginal Syllabics");
 624: 
 625:     /**
 626:      * Ogham.
 627:      * 0x1680 - 0x169F.
 628:      * @since 1.4
 629:      */
 630:     public static final UnicodeBlock OGHAM
 631:       = new UnicodeBlock(0x1680, 0x169F,
 632:                          "OGHAM", 
 633:                          "Ogham");
 634: 
 635:     /**
 636:      * Runic.
 637:      * 0x16A0 - 0x16FF.
 638:      * @since 1.4
 639:      */
 640:     public static final UnicodeBlock RUNIC
 641:       = new UnicodeBlock(0x16A0, 0x16FF,
 642:                          "RUNIC", 
 643:                          "Runic");
 644: 
 645:     /**
 646:      * Tagalog.
 647:      * 0x1700 - 0x171F.
 648:      * @since 1.5
 649:      */
 650:     public static final UnicodeBlock TAGALOG
 651:       = new UnicodeBlock(0x1700, 0x171F,
 652:                          "TAGALOG", 
 653:                          "Tagalog");
 654: 
 655:     /**
 656:      * Hanunoo.
 657:      * 0x1720 - 0x173F.
 658:      * @since 1.5
 659:      */
 660:     public static final UnicodeBlock HANUNOO
 661:       = new UnicodeBlock(0x1720, 0x173F,
 662:                          "HANUNOO", 
 663:                          "Hanunoo");
 664: 
 665:     /**
 666:      * Buhid.
 667:      * 0x1740 - 0x175F.
 668:      * @since 1.5
 669:      */
 670:     public static final UnicodeBlock BUHID
 671:       = new UnicodeBlock(0x1740, 0x175F,
 672:                          "BUHID", 
 673:                          "Buhid");
 674: 
 675:     /**
 676:      * Tagbanwa.
 677:      * 0x1760 - 0x177F.
 678:      * @since 1.5
 679:      */
 680:     public static final UnicodeBlock TAGBANWA
 681:       = new UnicodeBlock(0x1760, 0x177F,
 682:                          "TAGBANWA", 
 683:                          "Tagbanwa");
 684: 
 685:     /**
 686:      * Khmer.
 687:      * 0x1780 - 0x17FF.
 688:      * @since 1.4
 689:      */
 690:     public static final UnicodeBlock KHMER
 691:       = new UnicodeBlock(0x1780, 0x17FF,
 692:                          "KHMER", 
 693:                          "Khmer");
 694: 
 695:     /**
 696:      * Mongolian.
 697:      * 0x1800 - 0x18AF.
 698:      * @since 1.4
 699:      */
 700:     public static final UnicodeBlock MONGOLIAN
 701:       = new UnicodeBlock(0x1800, 0x18AF,
 702:                          "MONGOLIAN", 
 703:                          "Mongolian");
 704: 
 705:     /**
 706:      * Limbu.
 707:      * 0x1900 - 0x194F.
 708:      * @since 1.5
 709:      */
 710:     public static final UnicodeBlock LIMBU
 711:       = new UnicodeBlock(0x1900, 0x194F,
 712:                          "LIMBU", 
 713:                          "Limbu");
 714: 
 715:     /**
 716:      * Tai Le.
 717:      * 0x1950 - 0x197F.
 718:      * @since 1.5
 719:      */
 720:     public static final UnicodeBlock TAI_LE
 721:       = new UnicodeBlock(0x1950, 0x197F,
 722:                          "TAI_LE", 
 723:                          "Tai Le");
 724: 
 725:     /**
 726:      * Khmer Symbols.
 727:      * 0x19E0 - 0x19FF.
 728:      * @since 1.5
 729:      */
 730:     public static final UnicodeBlock KHMER_SYMBOLS
 731:       = new UnicodeBlock(0x19E0, 0x19FF,
 732:                          "KHMER_SYMBOLS", 
 733:                          "Khmer Symbols");
 734: 
 735:     /**
 736:      * Phonetic Extensions.
 737:      * 0x1D00 - 0x1D7F.
 738:      * @since 1.5
 739:      */
 740:     public static final UnicodeBlock PHONETIC_EXTENSIONS
 741:       = new UnicodeBlock(0x1D00, 0x1D7F,
 742:                          "PHONETIC_EXTENSIONS", 
 743:                          "Phonetic Extensions");
 744: 
 745:     /**
 746:      * Latin Extended Additional.
 747:      * 0x1E00 - 0x1EFF.
 748:      */
 749:     public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
 750:       = new UnicodeBlock(0x1E00, 0x1EFF,
 751:                          "LATIN_EXTENDED_ADDITIONAL", 
 752:                          "Latin Extended Additional");
 753: 
 754:     /**
 755:      * Greek Extended.
 756:      * 0x1F00 - 0x1FFF.
 757:      */
 758:     public static final UnicodeBlock GREEK_EXTENDED
 759:       = new UnicodeBlock(0x1F00, 0x1FFF,
 760:                          "GREEK_EXTENDED", 
 761:                          "Greek Extended");
 762: 
 763:     /**
 764:      * General Punctuation.
 765:      * 0x2000 - 0x206F.
 766:      */
 767:     public static final UnicodeBlock GENERAL_PUNCTUATION
 768:       = new UnicodeBlock(0x2000, 0x206F,
 769:                          "GENERAL_PUNCTUATION", 
 770:                          "General Punctuation");
 771: 
 772:     /**
 773:      * Superscripts and Subscripts.
 774:      * 0x2070 - 0x209F.
 775:      */
 776:     public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
 777:       = new UnicodeBlock(0x2070, 0x209F,
 778:                          "SUPERSCRIPTS_AND_SUBSCRIPTS", 
 779:                          "Superscripts and Subscripts");
 780: 
 781:     /**
 782:      * Currency Symbols.
 783:      * 0x20A0 - 0x20CF.
 784:      */
 785:     public static final UnicodeBlock CURRENCY_SYMBOLS
 786:       = new UnicodeBlock(0x20A0, 0x20CF,
 787:                          "CURRENCY_SYMBOLS", 
 788:                          "Currency Symbols");
 789: 
 790:     /**
 791:      * Combining Marks for Symbols.
 792:      * 0x20D0 - 0x20FF.
 793:      */
 794:     public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
 795:       = new UnicodeBlock(0x20D0, 0x20FF,
 796:                          "COMBINING_MARKS_FOR_SYMBOLS", 
 797:                          "Combining Marks for Symbols");
 798: 
 799:     /**
 800:      * Letterlike Symbols.
 801:      * 0x2100 - 0x214F.
 802:      */
 803:     public static final UnicodeBlock LETTERLIKE_SYMBOLS
 804:       = new UnicodeBlock(0x2100, 0x214F,
 805:                          "LETTERLIKE_SYMBOLS", 
 806:                          "Letterlike Symbols");
 807: 
 808:     /**
 809:      * Number Forms.
 810:      * 0x2150 - 0x218F.
 811:      */
 812:     public static final UnicodeBlock NUMBER_FORMS
 813:       = new UnicodeBlock(0x2150, 0x218F,
 814:                          "NUMBER_FORMS", 
 815:                          "Number Forms");
 816: 
 817:     /**
 818:      * Arrows.
 819:      * 0x2190 - 0x21FF.
 820:      */
 821:     public static final UnicodeBlock ARROWS
 822:       = new UnicodeBlock(0x2190, 0x21FF,
 823:                          "ARROWS", 
 824:                          "Arrows");
 825: 
 826:     /**
 827:      * Mathematical Operators.
 828:      * 0x2200 - 0x22FF.
 829:      */
 830:     public static final UnicodeBlock MATHEMATICAL_OPERATORS
 831:       = new UnicodeBlock(0x2200, 0x22FF,
 832:                          "MATHEMATICAL_OPERATORS", 
 833:                          "Mathematical Operators");
 834: 
 835:     /**
 836:      * Miscellaneous Technical.
 837:      * 0x2300 - 0x23FF.
 838:      */
 839:     public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
 840:       = new UnicodeBlock(0x2300, 0x23FF,
 841:                          "MISCELLANEOUS_TECHNICAL", 
 842:                          "Miscellaneous Technical");
 843: 
 844:     /**
 845:      * Control Pictures.
 846:      * 0x2400 - 0x243F.
 847:      */
 848:     public static final UnicodeBlock CONTROL_PICTURES
 849:       = new UnicodeBlock(0x2400, 0x243F,
 850:                          "CONTROL_PICTURES", 
 851:                          "Control Pictures");
 852: 
 853:     /**
 854:      * Optical Character Recognition.
 855:      * 0x2440 - 0x245F.
 856:      */
 857:     public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
 858:       = new UnicodeBlock(0x2440, 0x245F,
 859:                          "OPTICAL_CHARACTER_RECOGNITION", 
 860:                          "Optical Character Recognition");
 861: 
 862:     /**
 863:      * Enclosed Alphanumerics.
 864:      * 0x2460 - 0x24FF.
 865:      */
 866:     public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
 867:       = new UnicodeBlock(0x2460, 0x24FF,
 868:                          "ENCLOSED_ALPHANUMERICS", 
 869:                          "Enclosed Alphanumerics");
 870: 
 871:     /**
 872:      * Box Drawing.
 873:      * 0x2500 - 0x257F.
 874:      */
 875:     public static final UnicodeBlock BOX_DRAWING
 876:       = new UnicodeBlock(0x2500, 0x257F,
 877:                          "BOX_DRAWING", 
 878:                          "Box Drawing");
 879: 
 880:     /**
 881:      * Block Elements.
 882:      * 0x2580 - 0x259F.
 883:      */
 884:     public static final UnicodeBlock BLOCK_ELEMENTS
 885:       = new UnicodeBlock(0x2580, 0x259F,
 886:                          "BLOCK_ELEMENTS", 
 887:                          "Block Elements");
 888: 
 889:     /**
 890:      * Geometric Shapes.
 891:      * 0x25A0 - 0x25FF.
 892:      */
 893:     public static final UnicodeBlock GEOMETRIC_SHAPES
 894:       = new UnicodeBlock(0x25A0, 0x25FF,
 895:                          "GEOMETRIC_SHAPES", 
 896:                          "Geometric Shapes");
 897: 
 898:     /**
 899:      * Miscellaneous Symbols.
 900:      * 0x2600 - 0x26FF.
 901:      */
 902:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
 903:       = new UnicodeBlock(0x2600, 0x26FF,
 904:                          "MISCELLANEOUS_SYMBOLS", 
 905:                          "Miscellaneous Symbols");
 906: 
 907:     /**
 908:      * Dingbats.
 909:      * 0x2700 - 0x27BF.
 910:      */
 911:     public static final UnicodeBlock DINGBATS
 912:       = new UnicodeBlock(0x2700, 0x27BF,
 913:                          "DINGBATS", 
 914:                          "Dingbats");
 915: 
 916:     /**
 917:      * Miscellaneous Mathematical Symbols-A.
 918:      * 0x27C0 - 0x27EF.
 919:      * @since 1.5
 920:      */
 921:     public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
 922:       = new UnicodeBlock(0x27C0, 0x27EF,
 923:                          "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 
 924:                          "Miscellaneous Mathematical Symbols-A");
 925: 
 926:     /**
 927:      * Supplemental Arrows-A.
 928:      * 0x27F0 - 0x27FF.
 929:      * @since 1.5
 930:      */
 931:     public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
 932:       = new UnicodeBlock(0x27F0, 0x27FF,
 933:                          "SUPPLEMENTAL_ARROWS_A", 
 934:                          "Supplemental Arrows-A");
 935: 
 936:     /**
 937:      * Braille Patterns.
 938:      * 0x2800 - 0x28FF.
 939:      * @since 1.4
 940:      */
 941:     public static final UnicodeBlock BRAILLE_PATTERNS
 942:       = new UnicodeBlock(0x2800, 0x28FF,
 943:                          "BRAILLE_PATTERNS", 
 944:                          "Braille Patterns");
 945: 
 946:     /**
 947:      * Supplemental Arrows-B.
 948:      * 0x2900 - 0x297F.
 949:      * @since 1.5
 950:      */
 951:     public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
 952:       = new UnicodeBlock(0x2900, 0x297F,
 953:                          "SUPPLEMENTAL_ARROWS_B", 
 954:                          "Supplemental Arrows-B");
 955: 
 956:     /**
 957:      * Miscellaneous Mathematical Symbols-B.
 958:      * 0x2980 - 0x29FF.
 959:      * @since 1.5
 960:      */
 961:     public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
 962:       = new UnicodeBlock(0x2980, 0x29FF,
 963:                          "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 
 964:                          "Miscellaneous Mathematical Symbols-B");
 965: 
 966:     /**
 967:      * Supplemental Mathematical Operators.
 968:      * 0x2A00 - 0x2AFF.
 969:      * @since 1.5
 970:      */
 971:     public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
 972:       = new UnicodeBlock(0x2A00, 0x2AFF,
 973:                          "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 
 974:                          "Supplemental Mathematical Operators");
 975: 
 976:     /**
 977:      * Miscellaneous Symbols and Arrows.
 978:      * 0x2B00 - 0x2BFF.
 979:      * @since 1.5
 980:      */
 981:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
 982:       = new UnicodeBlock(0x2B00, 0x2BFF,
 983:                          "MISCELLANEOUS_SYMBOLS_AND_ARROWS", 
 984:                          "Miscellaneous Symbols and Arrows");
 985: 
 986:     /**
 987:      * CJK Radicals Supplement.
 988:      * 0x2E80 - 0x2EFF.
 989:      * @since 1.4
 990:      */
 991:     public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
 992:       = new UnicodeBlock(0x2E80, 0x2EFF,
 993:                          "CJK_RADICALS_SUPPLEMENT", 
 994:                          "CJK Radicals Supplement");
 995: 
 996:     /**
 997:      * Kangxi Radicals.
 998:      * 0x2F00 - 0x2FDF.
 999:      * @since 1.4
1000:      */
1001:     public static final UnicodeBlock KANGXI_RADICALS
1002:       = new UnicodeBlock(0x2F00, 0x2FDF,
1003:                          "KANGXI_RADICALS", 
1004:                          "Kangxi Radicals");
1005: 
1006:     /**
1007:      * Ideographic Description Characters.
1008:      * 0x2FF0 - 0x2FFF.
1009:      * @since 1.4
1010:      */
1011:     public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1012:       = new UnicodeBlock(0x2FF0, 0x2FFF,
1013:                          "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 
1014:                          "Ideographic Description Characters");
1015: 
1016:     /**
1017:      * CJK Symbols and Punctuation.
1018:      * 0x3000 - 0x303F.
1019:      */
1020:     public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1021:       = new UnicodeBlock(0x3000, 0x303F,
1022:                          "CJK_SYMBOLS_AND_PUNCTUATION", 
1023:                          "CJK Symbols and Punctuation");
1024: 
1025:     /**
1026:      * Hiragana.
1027:      * 0x3040 - 0x309F.
1028:      */
1029:     public static final UnicodeBlock HIRAGANA
1030:       = new UnicodeBlock(0x3040, 0x309F,
1031:                          "HIRAGANA", 
1032:                          "Hiragana");
1033: 
1034:     /**
1035:      * Katakana.
1036:      * 0x30A0 - 0x30FF.
1037:      */
1038:     public static final UnicodeBlock KATAKANA
1039:       = new UnicodeBlock(0x30A0, 0x30FF,
1040:                          "KATAKANA", 
1041:                          "Katakana");
1042: 
1043:     /**
1044:      * Bopomofo.
1045:      * 0x3100 - 0x312F.
1046:      */
1047:     public static final UnicodeBlock BOPOMOFO
1048:       = new UnicodeBlock(0x3100, 0x312F,
1049:                          "BOPOMOFO", 
1050:                          "Bopomofo");
1051: 
1052:     /**
1053:      * Hangul Compatibility Jamo.
1054:      * 0x3130 - 0x318F.
1055:      */
1056:     public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1057:       = new UnicodeBlock(0x3130, 0x318F,
1058:                          "HANGUL_COMPATIBILITY_JAMO", 
1059:                          "Hangul Compatibility Jamo");
1060: 
1061:     /**
1062:      * Kanbun.
1063:      * 0x3190 - 0x319F.
1064:      */
1065:     public static final UnicodeBlock KANBUN
1066:       = new UnicodeBlock(0x3190, 0x319F,
1067:                          "KANBUN", 
1068:                          "Kanbun");
1069: 
1070:     /**
1071:      * Bopomofo Extended.
1072:      * 0x31A0 - 0x31BF.
1073:      * @since 1.4
1074:      */
1075:     public static final UnicodeBlock BOPOMOFO_EXTENDED
1076:       = new UnicodeBlock(0x31A0, 0x31BF,
1077:                          "BOPOMOFO_EXTENDED", 
1078:                          "Bopomofo Extended");
1079: 
1080:     /**
1081:      * Katakana Phonetic Extensions.
1082:      * 0x31F0 - 0x31FF.
1083:      * @since 1.5
1084:      */
1085:     public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1086:       = new UnicodeBlock(0x31F0, 0x31FF,
1087:                          "KATAKANA_PHONETIC_EXTENSIONS", 
1088:                          "Katakana Phonetic Extensions");
1089: 
1090:     /**
1091:      * Enclosed CJK Letters and Months.
1092:      * 0x3200 - 0x32FF.
1093:      */
1094:     public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1095:       = new UnicodeBlock(0x3200, 0x32FF,
1096:                          "ENCLOSED_CJK_LETTERS_AND_MONTHS", 
1097:                          "Enclosed CJK Letters and Months");
1098: 
1099:     /**
1100:      * CJK Compatibility.
1101:      * 0x3300 - 0x33FF.
1102:      */
1103:     public static final UnicodeBlock CJK_COMPATIBILITY
1104:       = new UnicodeBlock(0x3300, 0x33FF,
1105:                          "CJK_COMPATIBILITY", 
1106:                          "CJK Compatibility");
1107: 
1108:     /**
1109:      * CJK Unified Ideographs Extension A.
1110:      * 0x3400 - 0x4DBF.
1111:      * @since 1.4
1112:      */
1113:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1114:       = new UnicodeBlock(0x3400, 0x4DBF,
1115:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 
1116:                          "CJK Unified Ideographs Extension A");
1117: 
1118:     /**
1119:      * Yijing Hexagram Symbols.
1120:      * 0x4DC0 - 0x4DFF.
1121:      * @since 1.5
1122:      */
1123:     public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1124:       = new UnicodeBlock(0x4DC0, 0x4DFF,
1125:                          "YIJING_HEXAGRAM_SYMBOLS", 
1126:                          "Yijing Hexagram Symbols");
1127: 
1128:     /**
1129:      * CJK Unified Ideographs.
1130:      * 0x4E00 - 0x9FFF.
1131:      */
1132:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1133:       = new UnicodeBlock(0x4E00, 0x9FFF,
1134:                          "CJK_UNIFIED_IDEOGRAPHS", 
1135:                          "CJK Unified Ideographs");
1136: 
1137:     /**
1138:      * Yi Syllables.
1139:      * 0xA000 - 0xA48F.
1140:      * @since 1.4
1141:      */
1142:     public static final UnicodeBlock YI_SYLLABLES
1143:       = new UnicodeBlock(0xA000, 0xA48F,
1144:                          "YI_SYLLABLES", 
1145:                          "Yi Syllables");
1146: 
1147:     /**
1148:      * Yi Radicals.
1149:      * 0xA490 - 0xA4CF.
1150:      * @since 1.4
1151:      */
1152:     public static final UnicodeBlock YI_RADICALS
1153:       = new UnicodeBlock(0xA490, 0xA4CF,
1154:                          "YI_RADICALS", 
1155:                          "Yi Radicals");
1156: 
1157:     /**
1158:      * Hangul Syllables.
1159:      * 0xAC00 - 0xD7AF.
1160:      */
1161:     public static final UnicodeBlock HANGUL_SYLLABLES
1162:       = new UnicodeBlock(0xAC00, 0xD7AF,
1163:                          "HANGUL_SYLLABLES", 
1164:                          "Hangul Syllables");
1165: 
1166:     /**
1167:      * High Surrogates.
1168:      * 0xD800 - 0xDB7F.
1169:      * @since 1.5
1170:      */
1171:     public static final UnicodeBlock HIGH_SURROGATES
1172:       = new UnicodeBlock(0xD800, 0xDB7F,
1173:                          "HIGH_SURROGATES", 
1174:                          "High Surrogates");
1175: 
1176:     /**
1177:      * High Private Use Surrogates.
1178:      * 0xDB80 - 0xDBFF.
1179:      * @since 1.5
1180:      */
1181:     public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1182:       = new UnicodeBlock(0xDB80, 0xDBFF,
1183:                          "HIGH_PRIVATE_USE_SURROGATES", 
1184:                          "High Private Use Surrogates");
1185: 
1186:     /**
1187:      * Low Surrogates.
1188:      * 0xDC00 - 0xDFFF.
1189:      * @since 1.5
1190:      */
1191:     public static final UnicodeBlock LOW_SURROGATES
1192:       = new UnicodeBlock(0xDC00, 0xDFFF,
1193:                          "LOW_SURROGATES", 
1194:                          "Low Surrogates");
1195: 
1196:     /**
1197:      * Private Use Area.
1198:      * 0xE000 - 0xF8FF.
1199:      */
1200:     public static final UnicodeBlock PRIVATE_USE_AREA
1201:       = new UnicodeBlock(0xE000, 0xF8FF,
1202:                          "PRIVATE_USE_AREA", 
1203:                          "Private Use Area");
1204: 
1205:     /**
1206:      * CJK Compatibility Ideographs.
1207:      * 0xF900 - 0xFAFF.
1208:      */
1209:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1210:       = new UnicodeBlock(0xF900, 0xFAFF,
1211:                          "CJK_COMPATIBILITY_IDEOGRAPHS", 
1212:                          "CJK Compatibility Ideographs");
1213: 
1214:     /**
1215:      * Alphabetic Presentation Forms.
1216:      * 0xFB00 - 0xFB4F.
1217:      */
1218:     public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1219:       = new UnicodeBlock(0xFB00, 0xFB4F,
1220:                          "ALPHABETIC_PRESENTATION_FORMS", 
1221:                          "Alphabetic Presentation Forms");
1222: 
1223:     /**
1224:      * Arabic Presentation Forms-A.
1225:      * 0xFB50 - 0xFDFF.
1226:      */
1227:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1228:       = new UnicodeBlock(0xFB50, 0xFDFF,
1229:                          "ARABIC_PRESENTATION_FORMS_A", 
1230:                          "Arabic Presentation Forms-A");
1231: 
1232:     /**
1233:      * Variation Selectors.
1234:      * 0xFE00 - 0xFE0F.
1235:      * @since 1.5
1236:      */
1237:     public static final UnicodeBlock VARIATION_SELECTORS
1238:       = new UnicodeBlock(0xFE00, 0xFE0F,
1239:                          "VARIATION_SELECTORS", 
1240:                          "Variation Selectors");
1241: 
1242:     /**
1243:      * Combining Half Marks.
1244:      * 0xFE20 - 0xFE2F.
1245:      */
1246:     public static final UnicodeBlock COMBINING_HALF_MARKS
1247:       = new UnicodeBlock(0xFE20, 0xFE2F,
1248:                          "COMBINING_HALF_MARKS", 
1249:                          "Combining Half Marks");
1250: 
1251:     /**
1252:      * CJK Compatibility Forms.
1253:      * 0xFE30 - 0xFE4F.
1254:      */
1255:     public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1256:       = new UnicodeBlock(0xFE30, 0xFE4F,
1257:                          "CJK_COMPATIBILITY_FORMS", 
1258:                          "CJK Compatibility Forms");
1259: 
1260:     /**
1261:      * Small Form Variants.
1262:      * 0xFE50 - 0xFE6F.
1263:      */
1264:     public static final UnicodeBlock SMALL_FORM_VARIANTS
1265:       = new UnicodeBlock(0xFE50, 0xFE6F,
1266:                          "SMALL_FORM_VARIANTS", 
1267:                          "Small Form Variants");
1268: 
1269:     /**
1270:      * Arabic Presentation Forms-B.
1271:      * 0xFE70 - 0xFEFF.
1272:      */
1273:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1274:       = new UnicodeBlock(0xFE70, 0xFEFF,
1275:                          "ARABIC_PRESENTATION_FORMS_B", 
1276:                          "Arabic Presentation Forms-B");
1277: 
1278:     /**
1279:      * Halfwidth and Fullwidth Forms.
1280:      * 0xFF00 - 0xFFEF.
1281:      */
1282:     public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1283:       = new UnicodeBlock(0xFF00, 0xFFEF,
1284:                          "HALFWIDTH_AND_FULLWIDTH_FORMS", 
1285:                          "Halfwidth and Fullwidth Forms");
1286: 
1287:     /**
1288:      * Specials.
1289:      * 0xFFF0 - 0xFFFF.
1290:      */
1291:     public static final UnicodeBlock SPECIALS
1292:       = new UnicodeBlock(0xFFF0, 0xFFFF,
1293:                          "SPECIALS", 
1294:                          "Specials");
1295: 
1296:     /**
1297:      * Linear B Syllabary.
1298:      * 0x10000 - 0x1007F.
1299:      * @since 1.5
1300:      */
1301:     public static final UnicodeBlock LINEAR_B_SYLLABARY
1302:       = new UnicodeBlock(0x10000, 0x1007F,
1303:                          "LINEAR_B_SYLLABARY", 
1304:                          "Linear B Syllabary");
1305: 
1306:     /**
1307:      * Linear B Ideograms.
1308:      * 0x10080 - 0x100FF.
1309:      * @since 1.5
1310:      */
1311:     public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1312:       = new UnicodeBlock(0x10080, 0x100FF,
1313:                          "LINEAR_B_IDEOGRAMS", 
1314:                          "Linear B Ideograms");
1315: 
1316:     /**
1317:      * Aegean Numbers.
1318:      * 0x10100 - 0x1013F.
1319:      * @since 1.5
1320:      */
1321:     public static final UnicodeBlock AEGEAN_NUMBERS
1322:       = new UnicodeBlock(0x10100, 0x1013F,
1323:                          "AEGEAN_NUMBERS", 
1324:                          "Aegean Numbers");
1325: 
1326:     /**
1327:      * Old Italic.
1328:      * 0x10300 - 0x1032F.
1329:      * @since 1.5
1330:      */
1331:     public static final UnicodeBlock OLD_ITALIC
1332:       = new UnicodeBlock(0x10300, 0x1032F,
1333:                          "OLD_ITALIC", 
1334:                          "Old Italic");
1335: 
1336:     /**
1337:      * Gothic.
1338:      * 0x10330 - 0x1034F.
1339:      * @since 1.5
1340:      */
1341:     public static final UnicodeBlock GOTHIC
1342:       = new UnicodeBlock(0x10330, 0x1034F,
1343:                          "GOTHIC", 
1344:                          "Gothic");
1345: 
1346:     /**
1347:      * Ugaritic.
1348:      * 0x10380 - 0x1039F.
1349:      * @since 1.5
1350:      */
1351:     public static final UnicodeBlock UGARITIC
1352:       = new UnicodeBlock(0x10380, 0x1039F,
1353:                          "UGARITIC", 
1354:                          "Ugaritic");
1355: 
1356:     /**
1357:      * Deseret.
1358:      * 0x10400 - 0x1044F.
1359:      * @since 1.5
1360:      */
1361:     public static final UnicodeBlock DESERET
1362:       = new UnicodeBlock(0x10400, 0x1044F,
1363:                          "DESERET", 
1364:                          "Deseret");
1365: 
1366:     /**
1367:      * Shavian.
1368:      * 0x10450 - 0x1047F.
1369:      * @since 1.5
1370:      */
1371:     public static final UnicodeBlock SHAVIAN
1372:       = new UnicodeBlock(0x10450, 0x1047F,
1373:                          "SHAVIAN", 
1374:                          "Shavian");
1375: 
1376:     /**
1377:      * Osmanya.
1378:      * 0x10480 - 0x104AF.
1379:      * @since 1.5
1380:      */
1381:     public static final UnicodeBlock OSMANYA
1382:       = new UnicodeBlock(0x10480, 0x104AF,
1383:                          "OSMANYA", 
1384:                          "Osmanya");
1385: 
1386:     /**
1387:      * Cypriot Syllabary.
1388:      * 0x10800 - 0x1083F.
1389:      * @since 1.5
1390:      */
1391:     public static final UnicodeBlock CYPRIOT_SYLLABARY
1392:       = new UnicodeBlock(0x10800, 0x1083F,
1393:                          "CYPRIOT_SYLLABARY", 
1394:                          "Cypriot Syllabary");
1395: 
1396:     /**
1397:      * Byzantine Musical Symbols.
1398:      * 0x1D000 - 0x1D0FF.
1399:      * @since 1.5
1400:      */
1401:     public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1402:       = new UnicodeBlock(0x1D000, 0x1D0FF,
1403:                          "BYZANTINE_MUSICAL_SYMBOLS", 
1404:                          "Byzantine Musical Symbols");
1405: 
1406:     /**
1407:      * Musical Symbols.
1408:      * 0x1D100 - 0x1D1FF.
1409:      * @since 1.5
1410:      */
1411:     public static final UnicodeBlock MUSICAL_SYMBOLS
1412:       = new UnicodeBlock(0x1D100, 0x1D1FF,
1413:                          "MUSICAL_SYMBOLS", 
1414:                          "Musical Symbols");
1415: 
1416:     /**
1417:      * Tai Xuan Jing Symbols.
1418:      * 0x1D300 - 0x1D35F.
1419:      * @since 1.5
1420:      */
1421:     public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1422:       = new UnicodeBlock(0x1D300, 0x1D35F,
1423:                          "TAI_XUAN_JING_SYMBOLS", 
1424:                          "Tai Xuan Jing Symbols");
1425: 
1426:     /**
1427:      * Mathematical Alphanumeric Symbols.
1428:      * 0x1D400 - 0x1D7FF.
1429:      * @since 1.5
1430:      */
1431:     public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1432:       = new UnicodeBlock(0x1D400, 0x1D7FF,
1433:                          "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 
1434:                          "Mathematical Alphanumeric Symbols");
1435: 
1436:     /**
1437:      * CJK Unified Ideographs Extension B.
1438:      * 0x20000 - 0x2A6DF.
1439:      * @since 1.5
1440:      */
1441:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1442:       = new UnicodeBlock(0x20000, 0x2A6DF,
1443:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 
1444:                          "CJK Unified Ideographs Extension B");
1445: 
1446:     /**
1447:      * CJK Compatibility Ideographs Supplement.
1448:      * 0x2F800 - 0x2FA1F.
1449:      * @since 1.5
1450:      */
1451:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1452:       = new UnicodeBlock(0x2F800, 0x2FA1F,
1453:                          "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 
1454:                          "CJK Compatibility Ideographs Supplement");
1455: 
1456:     /**
1457:      * Tags.
1458:      * 0xE0000 - 0xE007F.
1459:      * @since 1.5
1460:      */
1461:     public static final UnicodeBlock TAGS
1462:       = new UnicodeBlock(0xE0000, 0xE007F,
1463:                          "TAGS", 
1464:                          "Tags");
1465: 
1466:     /**
1467:      * Variation Selectors Supplement.
1468:      * 0xE0100 - 0xE01EF.
1469:      * @since 1.5
1470:      */
1471:     public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1472:       = new UnicodeBlock(0xE0100, 0xE01EF,
1473:                          "VARIATION_SELECTORS_SUPPLEMENT", 
1474:                          "Variation Selectors Supplement");
1475: 
1476:     /**
1477:      * Supplementary Private Use Area-A.
1478:      * 0xF0000 - 0xFFFFF.
1479:      * @since 1.5
1480:      */
1481:     public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1482:       = new UnicodeBlock(0xF0000, 0xFFFFF,
1483:                          "SUPPLEMENTARY_PRIVATE_USE_AREA_A", 
1484:                          "Supplementary Private Use Area-A");
1485: 
1486:     /**
1487:      * Supplementary Private Use Area-B.
1488:      * 0x100000 - 0x10FFFF.
1489:      * @since 1.5
1490:      */
1491:     public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1492:       = new UnicodeBlock(0x100000, 0x10FFFF,
1493:                          "SUPPLEMENTARY_PRIVATE_USE_AREA_B", 
1494:                          "Supplementary Private Use Area-B");
1495: 
1496:     /**
1497:      * Surrogates Area.
1498:      * 'D800' - 'DFFF'.
1499:      * @deprecated As of 1.5, the three areas, 
1500:      * <a href="#HIGH_SURROGATES">HIGH_SURROGATES</a>,
1501:      * <a href="#HIGH_PRIVATE_USE_SURROGATES">HIGH_PRIVATE_USE_SURROGATES</a>
1502:      * and <a href="#LOW_SURROGATES">LOW_SURROGATES</a>, as defined
1503:      * by the Unicode standard, should be used in preference to
1504:      * this.  These are also returned from calls to <code>of(int)</code>
1505:      * and <code>of(char)</code>.
1506:      */
1507:     @Deprecated
1508:     public static final UnicodeBlock SURROGATES_AREA
1509:       = new UnicodeBlock(0xD800, 0xDFFF,
1510:                          "SURROGATES_AREA",
1511:              "Surrogates Area");
1512: 
1513:     /**
1514:      * The defined subsets.
1515:      */
1516:     private static final UnicodeBlock sets[] = {
1517:       BASIC_LATIN,
1518:       LATIN_1_SUPPLEMENT,
1519:       LATIN_EXTENDED_A,
1520:       LATIN_EXTENDED_B,
1521:       IPA_EXTENSIONS,
1522:       SPACING_MODIFIER_LETTERS,
1523:       COMBINING_DIACRITICAL_MARKS,
1524:       GREEK,
1525:       CYRILLIC,
1526:       CYRILLIC_SUPPLEMENTARY,
1527:       ARMENIAN,
1528:       HEBREW,
1529:       ARABIC,
1530:       SYRIAC,
1531:       THAANA,
1532:       DEVANAGARI,
1533:       BENGALI,
1534:       GURMUKHI,
1535:       GUJARATI,
1536:       ORIYA,
1537:       TAMIL,
1538:       TELUGU,
1539:       KANNADA,
1540:       MALAYALAM,
1541:       SINHALA,
1542:       THAI,
1543:       LAO,
1544:       TIBETAN,
1545:       MYANMAR,
1546:       GEORGIAN,
1547:       HANGUL_JAMO,
1548:       ETHIOPIC,
1549:       CHEROKEE,
1550:       UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1551:       OGHAM,
1552:       RUNIC,
1553:       TAGALOG,
1554:       HANUNOO,
1555:       BUHID,
1556:       TAGBANWA,
1557:       KHMER,
1558:       MONGOLIAN,
1559:       LIMBU,
1560:       TAI_LE,
1561:       KHMER_SYMBOLS,
1562:       PHONETIC_EXTENSIONS,
1563:       LATIN_EXTENDED_ADDITIONAL,
1564:       GREEK_EXTENDED,
1565:       GENERAL_PUNCTUATION,
1566:       SUPERSCRIPTS_AND_SUBSCRIPTS,
1567:       CURRENCY_SYMBOLS,
1568:       COMBINING_MARKS_FOR_SYMBOLS,
1569:       LETTERLIKE_SYMBOLS,
1570:       NUMBER_FORMS,
1571:       ARROWS,
1572:       MATHEMATICAL_OPERATORS,
1573:       MISCELLANEOUS_TECHNICAL,
1574:       CONTROL_PICTURES,
1575:       OPTICAL_CHARACTER_RECOGNITION,
1576:       ENCLOSED_ALPHANUMERICS,
1577:       BOX_DRAWING,
1578:       BLOCK_ELEMENTS,
1579:       GEOMETRIC_SHAPES,
1580:       MISCELLANEOUS_SYMBOLS,
1581:       DINGBATS,
1582:       MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1583:       SUPPLEMENTAL_ARROWS_A,
1584:       BRAILLE_PATTERNS,
1585:       SUPPLEMENTAL_ARROWS_B,
1586:       MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1587:       SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1588:       MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1589:       CJK_RADICALS_SUPPLEMENT,
1590:       KANGXI_RADICALS,
1591:       IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1592:       CJK_SYMBOLS_AND_PUNCTUATION,
1593:       HIRAGANA,
1594:       KATAKANA,
1595:       BOPOMOFO,
1596:       HANGUL_COMPATIBILITY_JAMO,
1597:       KANBUN,
1598:       BOPOMOFO_EXTENDED,
1599:       KATAKANA_PHONETIC_EXTENSIONS,
1600:       ENCLOSED_CJK_LETTERS_AND_MONTHS,
1601:       CJK_COMPATIBILITY,
1602:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1603:       YIJING_HEXAGRAM_SYMBOLS,
1604:       CJK_UNIFIED_IDEOGRAPHS,
1605:       YI_SYLLABLES,
1606:       YI_RADICALS,
1607:       HANGUL_SYLLABLES,
1608:       HIGH_SURROGATES,
1609:       HIGH_PRIVATE_USE_SURROGATES,
1610:       LOW_SURROGATES,
1611:       PRIVATE_USE_AREA,
1612:       CJK_COMPATIBILITY_IDEOGRAPHS,
1613:       ALPHABETIC_PRESENTATION_FORMS,
1614:       ARABIC_PRESENTATION_FORMS_A,
1615:       VARIATION_SELECTORS,
1616:       COMBINING_HALF_MARKS,
1617:       CJK_COMPATIBILITY_FORMS,
1618:       SMALL_FORM_VARIANTS,
1619:       ARABIC_PRESENTATION_FORMS_B,
1620:       HALFWIDTH_AND_FULLWIDTH_FORMS,
1621:       SPECIALS,
1622:       LINEAR_B_SYLLABARY,
1623:       LINEAR_B_IDEOGRAMS,
1624:       AEGEAN_NUMBERS,
1625:       OLD_ITALIC,
1626:       GOTHIC,
1627:       UGARITIC,
1628:       DESERET,
1629:       SHAVIAN,
1630:       OSMANYA,
1631:       CYPRIOT_SYLLABARY,
1632:       BYZANTINE_MUSICAL_SYMBOLS,
1633:       MUSICAL_SYMBOLS,
1634:       TAI_XUAN_JING_SYMBOLS,
1635:       MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1636:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1637:       CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1638:       TAGS,
1639:       VARIATION_SELECTORS_SUPPLEMENT,
1640:       SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1641:       SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1642:     };
1643:   } // class UnicodeBlock
1644: 
1645:   /**
1646:    * A class to encompass all the properties of characters in the 
1647:    * private use blocks in the Unicode standard.  This class extends
1648:    * UnassignedCharacters because the return type from getType() is 
1649:    * different.
1650:    * @author Anthony Balkissoon abalkiss at redhat dot com
1651:    *
1652:    */
1653:   private static class PrivateUseCharacters extends UnassignedCharacters
1654:   {
1655:     /**
1656:      * Returns the type of the character cp.
1657:      */
1658:     static int getType(int cp)
1659:     {
1660:       // The upper 2 code points in any plane are considered unassigned, 
1661:       // even in the private-use planes.
1662:       if ((cp & 0xffff) >= 0xfffe)
1663:         return UnassignedCharacters.getType(cp);
1664:       return PRIVATE_USE;
1665:     }
1666:     
1667:     /**
1668:      * Returns true if the character cp is defined.
1669:      */
1670:     static boolean isDefined(int cp)
1671:     {
1672:       // The upper 2 code points in any plane are considered unassigned, 
1673:       // even in the private-use planes.
1674:       if ((cp & 0xffff) >= 0xfffe)
1675:         return UnassignedCharacters.isDefined(cp);
1676:       return true;
1677:     }
1678:     
1679:     /**
1680:      * Gets the directionality for the character cp.
1681:      */
1682:     static byte getDirectionality(int cp)
1683:     {
1684:       if ((cp & 0xffff) >= 0xfffe)
1685:         return UnassignedCharacters.getDirectionality(cp);
1686:       return DIRECTIONALITY_LEFT_TO_RIGHT;
1687:     }
1688:   }
1689:   
1690:   /**
1691:    * A class to encompass all the properties of code points that are 
1692:    * currently undefined in the Unicode standard.
1693:    * @author Anthony Balkissoon abalkiss at redhat dot com
1694:    *
1695:    */
1696:   private static class UnassignedCharacters
1697:   {
1698:     /**
1699:      * Returns the numeric value for the unassigned characters.
1700:      * @param cp the character
1701:      * @param radix the radix (not used)
1702:      * @return the numeric value of this character in this radix
1703:      */
1704:     static int digit(int cp, int radix)
1705:     {
1706:       return -1;
1707:     }
1708: 
1709:     /**
1710:      * Returns the Unicode directionality property for unassigned 
1711:      * characters.
1712:      * @param cp the character
1713:      * @return DIRECTIONALITY_UNDEFINED
1714:      */
1715:     static byte getDirectionality(int cp)
1716:     {
1717:       return DIRECTIONALITY_UNDEFINED;
1718:     }
1719: 
1720:     /**
1721:      * Returns -1, the numeric value for unassigned Unicode characters.
1722:      * @param cp the character
1723:      * @return -1
1724:      */
1725:     static int getNumericValue(int cp)
1726:     {
1727:       return -1;
1728:     }
1729: 
1730:     /**
1731:      * Returns UNASSIGNED, the type of unassigned Unicode characters.
1732:      * @param cp the character
1733:      * @return UNASSIGNED
1734:      */
1735:     static int getType(int cp)
1736:     {
1737:       return UNASSIGNED;
1738:     }
1739:     
1740:     /**
1741:      * Returns false to indiciate that the character is not defined in the 
1742:      * Unicode standard.
1743:      * @param cp the character
1744:      * @return false
1745:      */
1746:     static boolean isDefined(int cp)
1747:     {
1748:       return false;
1749:     }
1750: 
1751:     /**
1752:      * Returns false to indicate that the character is not a digit.
1753:      * @param cp the character
1754:      * @return false
1755:      */
1756:     static boolean isDigit(int cp)
1757:     {
1758:       return false;
1759:     }
1760: 
1761:     /**
1762:      * Returns false to indicate that the character cannot be ignored 
1763:      * within an identifier
1764:      * @param cp the character
1765:      * @return false
1766:      */
1767:     static boolean isIdentifierIgnorable(int cp)
1768:     {
1769:       return false;
1770:     }
1771: 
1772:     /**
1773:      * Returns false to indicate that the character cannot be part of a 
1774:      * Java identifier.
1775:      * @param cp the character
1776:      * @return false
1777:      */
1778:     static boolean isJavaIdentifierPart(int cp)
1779:     {
1780:       return false;
1781:     }
1782: 
1783:     /**
1784:      * Returns false to indicate that the character cannot be start a 
1785:      * Java identifier.
1786:      * @param cp the character
1787:      * @return false
1788:      */
1789:     static boolean isJavaIdentiferStart(int cp)
1790:     {
1791:       return false;
1792:     }
1793: 
1794:     /**
1795:      * Returns false to indicate that the character is not a letter.
1796:      * @param cp the character
1797:      * @return false
1798:      */
1799:     static boolean isLetter(int cp)
1800:     {
1801:       return false;
1802:     }
1803: 
1804:     /**
1805:      * Returns false to indicate that the character cannot is neither a letter
1806:      * nor a digit.
1807:      * @param cp the character
1808:      * @return false
1809:      */
1810:     static boolean isLetterOrDigit(int cp)
1811:     {
1812:       return false;
1813:     }
1814: 
1815:     /**
1816:      * Returns false to indicate that the character is not a lowercase letter.
1817:      * @param cp the character
1818:      * @return false
1819:      */
1820:     static boolean isLowerCase(int cp)
1821:     {
1822:       return false;
1823:     }
1824:     
1825:     /**
1826:      * Returns false to indicate that the character cannot is not mirrored.
1827:      * @param cp the character
1828:      * @return false
1829:      */
1830:     static boolean isMirrored(int cp)
1831:     {
1832:       return false;
1833:     }
1834: 
1835:     /**
1836:      * Returns false to indicate that the character is not a space character.
1837:      * @param cp the character
1838:      * @return false
1839:      */
1840:     static boolean isSpaceChar(int cp)
1841:     {
1842:       return false;
1843:     }
1844:     
1845:     /**
1846:      * Returns false to indicate that the character it not a titlecase letter.
1847:      * @param cp the character
1848:      * @return false
1849:      */
1850:     static boolean isTitleCase(int cp)
1851:     {
1852:       return false;
1853:     }
1854:     
1855:     /**
1856:      * Returns false to indicate that the character cannot be part of a 
1857:      * Unicode identifier.
1858:      * @param cp the character
1859:      * @return false
1860:      */
1861:     static boolean isUnicodeIdentifierPart(int cp)
1862:     {
1863:       return false;
1864:     }
1865: 
1866:     /**
1867:      * Returns false to indicate that the character cannot start a 
1868:      * Unicode identifier.
1869:      * @param cp the character
1870:      * @return false
1871:      */
1872:     static boolean isUnicodeIdentifierStart(int cp)
1873:     {
1874:       return false;
1875:     }
1876: 
1877:     /**
1878:      * Returns false to indicate that the character is not an uppercase letter.
1879:      * @param cp the character
1880:      * @return false
1881:      */
1882:     static boolean isUpperCase(int cp)
1883:     {
1884:       return false;
1885:     }
1886: 
1887:     /**
1888:      * Returns false to indicate that the character is not a whitespace
1889:      * character.
1890:      * @param cp the character
1891:      * @return false
1892:      */
1893:     static boolean isWhiteSpace(int cp)
1894:     {
1895:       return false;
1896:     }
1897: 
1898:     /**
1899:      * Returns cp to indicate this character has no lowercase conversion.
1900:      * @param cp the character
1901:      * @return cp
1902:      */
1903:     static int toLowerCase(int cp)
1904:     {
1905:       return cp;
1906:     }
1907:     
1908:     /**
1909:      * Returns cp to indicate this character has no titlecase conversion.
1910:      * @param cp the character
1911:      * @return cp
1912:      */
1913:     static int toTitleCase(int cp)
1914:     {
1915:       return cp;
1916:     }
1917: 
1918:     /**
1919:      * Returns cp to indicate this character has no uppercase conversion.
1920:      * @param cp the character
1921:      * @return cp
1922:      */
1923:     static int toUpperCase(int cp)
1924:     {
1925:       return cp;
1926:     }    
1927:   }
1928: 
1929:   /**
1930:    * The immutable value of this Character.
1931:    *
1932:    * @serial the value of this Character
1933:    */
1934:   private final char value;
1935: 
1936:   /**
1937:    * Compatible with JDK 1.0+.
1938:    */
1939:   private static final long serialVersionUID = 3786198910865385080L;
1940: 
1941:   /**
1942:    * Smallest value allowed for radix arguments in Java. This value is 2.
1943:    *
1944:    * @see #digit(char, int)
1945:    * @see #forDigit(int, int)
1946:    * @see Integer#toString(int, int)
1947:    * @see Integer#valueOf(String)
1948:    */
1949:   public static final int MIN_RADIX = 2;
1950: 
1951:   /**
1952:    * Largest value allowed for radix arguments in Java. This value is 36.
1953:    *
1954:    * @see #digit(char, int)
1955:    * @see #forDigit(int, int)
1956:    * @see Integer#toString(int, int)
1957:    * @see Integer#valueOf(String)
1958:    */
1959:   public static final int MAX_RADIX = 36;
1960: 
1961:   /**
1962:    * The minimum value the char data type can hold.
1963:    * This value is <code>'\\u0000'</code>.
1964:    */
1965:   public static final char MIN_VALUE = '\u0000';
1966: 
1967:   /**
1968:    * The maximum value the char data type can hold.
1969:    * This value is <code>'\\uFFFF'</code>.
1970:    */
1971:   public static final char MAX_VALUE = '\uFFFF';
1972: 
1973:   /**
1974:    * The minimum Unicode 4.0 code point.  This value is <code>0</code>.
1975:    * @since 1.5
1976:    */
1977:   public static final int MIN_CODE_POINT = 0;
1978: 
1979:   /**
1980:    * The maximum Unicode 4.0 code point, which is greater than the range
1981:    * of the char data type.
1982:    * This value is <code>0x10FFFF</code>.
1983:    * @since 1.5
1984:    */
1985:   public static final int MAX_CODE_POINT = 0x10FFFF;
1986: 
1987:   /**
1988:    * The minimum Unicode high surrogate code unit, or
1989:    * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
1990:    * This value is <code>'\uD800'</code>.
1991:    * @since 1.5
1992:    */
1993:   public static final char MIN_HIGH_SURROGATE = '\uD800';
1994: 
1995:   /**
1996:    * The maximum Unicode high surrogate code unit, or
1997:    * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
1998:    * This value is <code>'\uDBFF'</code>.
1999:    * @since 1.5
2000:    */
2001:   public static final char MAX_HIGH_SURROGATE = '\uDBFF';
2002: 
2003:   /**
2004:    * The minimum Unicode low surrogate code unit, or
2005:    * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
2006:    * This value is <code>'\uDC00'</code>.
2007:    * @since 1.5
2008:    */
2009:   public static final char MIN_LOW_SURROGATE = '\uDC00';
2010: 
2011:   /**
2012:    * The maximum Unicode low surrogate code unit, or
2013:    * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
2014:    * This value is <code>'\uDFFF'</code>.
2015:    * @since 1.5
2016:    */
2017:   public static final char MAX_LOW_SURROGATE = '\uDFFF';  
2018: 
2019:   /**
2020:    * The minimum Unicode surrogate code unit in the UTF-16 character encoding.
2021:    * This value is <code>'\uD800'</code>.
2022:    * @since 1.5
2023:    */
2024:   public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
2025: 
2026:   /**
2027:    * The maximum Unicode surrogate code unit in the UTF-16 character encoding.
2028:    * This value is <code>'\uDFFF'</code>.
2029:    * @since 1.5
2030:    */
2031:   public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
2032: 
2033:   /**
2034:    * The lowest possible supplementary Unicode code point (the first code
2035:    * point outside the basic multilingual plane (BMP)).
2036:    * This value is <code>0x10000</code>.
2037:    */ 
2038:   public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
2039: 
2040:   /**
2041:    * Class object representing the primitive char data type.
2042:    *
2043:    * @since 1.1
2044:    */
2045:   public static final Class<Character> TYPE = (Class<Character>) VMClassLoader.getPrimitiveClass('C');
2046: 
2047:   /**
2048:    * The number of bits needed to represent a <code>char</code>.
2049:    * @since 1.5
2050:    */
2051:   public static final int SIZE = 16;
2052: 
2053:   // This caches some Character values, and is used by boxing
2054:   // conversions via valueOf().  We must cache at least 0..127;
2055:   // this constant controls how much we actually cache.
2056:   private static final int MAX_CACHE = 127;
2057:   private static Character[] charCache = new Character[MAX_CACHE + 1];
2058: 
2059:   /**
2060:    * Lu = Letter, Uppercase (Informative).
2061:    *
2062:    * @since 1.1
2063:    */
2064:   public static final byte UPPERCASE_LETTER = 1;
2065: 
2066:   /**
2067:    * Ll = Letter, Lowercase (Informative).
2068:    *
2069:    * @since 1.1
2070:    */
2071:   public static final byte LOWERCASE_LETTER = 2;
2072: 
2073:   /**
2074:    * Lt = Letter, Titlecase (Informative).
2075:    *
2076:    * @since 1.1
2077:    */
2078:   public static final byte TITLECASE_LETTER = 3;
2079: 
2080:   /**
2081:    * Mn = Mark, Non-Spacing (Normative).
2082:    *
2083:    * @since 1.1
2084:    */
2085:   public static final byte NON_SPACING_MARK = 6;
2086: 
2087:   /**
2088:    * Mc = Mark, Spacing Combining (Normative).
2089:    *
2090:    * @since 1.1
2091:    */
2092:   public static final byte COMBINING_SPACING_MARK = 8;
2093: 
2094:   /**
2095:    * Me = Mark, Enclosing (Normative).
2096:    *
2097:    * @since 1.1
2098:    */
2099:   public static final byte ENCLOSING_MARK = 7;
2100: 
2101:   /**
2102:    * Nd = Number, Decimal Digit (Normative).
2103:    *
2104:    * @since 1.1
2105:    */
2106:   public static final byte DECIMAL_DIGIT_NUMBER = 9;
2107: 
2108:   /**
2109:    * Nl = Number, Letter (Normative).
2110:    *
2111:    * @since 1.1
2112:    */
2113:   public static final byte LETTER_NUMBER = 10;
2114: 
2115:   /**
2116:    * No = Number, Other (Normative).
2117:    *
2118:    * @since 1.1
2119:    */
2120:   public static final byte OTHER_NUMBER = 11;
2121: 
2122:   /**
2123:    * Zs = Separator, Space (Normative).
2124:    *
2125:    * @since 1.1
2126:    */
2127:   public static final byte SPACE_SEPARATOR = 12;
2128: 
2129:   /**
2130:    * Zl = Separator, Line (Normative).
2131:    *
2132:    * @since 1.1
2133:    */
2134:   public static final byte LINE_SEPARATOR = 13;
2135: 
2136:   /**
2137:    * Zp = Separator, Paragraph (Normative).
2138:    *
2139:    * @since 1.1
2140:    */
2141:   public static final byte PARAGRAPH_SEPARATOR = 14;
2142: 
2143:   /**
2144:    * Cc = Other, Control (Normative).
2145:    *
2146:    * @since 1.1
2147:    */
2148:   public static final byte CONTROL = 15;
2149: 
2150:   /**
2151:    * Cf = Other, Format (Normative).
2152:    *
2153:    * @since 1.1
2154:    */
2155:   public static final byte FORMAT = 16;
2156: 
2157:   /**
2158:    * Cs = Other, Surrogate (Normative).
2159:    *
2160:    * @since 1.1
2161:    */
2162:   public static final byte SURROGATE = 19;
2163: 
2164:   /**
2165:    * Co = Other, Private Use (Normative).
2166:    *
2167:    * @since 1.1
2168:    */
2169:   public static final byte PRIVATE_USE = 18;
2170: 
2171:   /**
2172:    * Cn = Other, Not Assigned (Normative).
2173:    *
2174:    * @since 1.1
2175:    */
2176:   public static final byte UNASSIGNED = 0;
2177: 
2178:   /**
2179:    * Lm = Letter, Modifier (Informative).
2180:    *
2181:    * @since 1.1
2182:    */
2183:   public static final byte MODIFIER_LETTER = 4;
2184: 
2185:   /**
2186:    * Lo = Letter, Other (Informative).
2187:    *
2188:    * @since 1.1
2189:    */
2190:   public static final byte OTHER_LETTER = 5;
2191: 
2192:   /**
2193:    * Pc = Punctuation, Connector (Informative).
2194:    *
2195:    * @since 1.1
2196:    */
2197:   public static final byte CONNECTOR_PUNCTUATION = 23;
2198: 
2199:   /**
2200:    * Pd = Punctuation, Dash (Informative).
2201:    *
2202:    * @since 1.1
2203:    */
2204:   public static final byte DASH_PUNCTUATION = 20;
2205: 
2206:   /**
2207:    * Ps = Punctuation, Open (Informative).
2208:    *
2209:    * @since 1.1
2210:    */
2211:   public static final byte START_PUNCTUATION = 21;
2212: 
2213:   /**
2214:    * Pe = Punctuation, Close (Informative).
2215:    *
2216:    * @since 1.1
2217:    */
2218:   public static final byte END_PUNCTUATION = 22;
2219: 
2220:   /**
2221:    * Pi = Punctuation, Initial Quote (Informative).
2222:    *
2223:    * @since 1.4
2224:    */
2225:   public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
2226: 
2227:   /**
2228:    * Pf = Punctuation, Final Quote (Informative).
2229:    *
2230:    * @since 1.4
2231:    */
2232:   public static final byte FINAL_QUOTE_PUNCTUATION = 30;
2233: 
2234:   /**
2235:    * Po = Punctuation, Other (Informative).
2236:    *
2237:    * @since 1.1
2238:    */
2239:   public static final byte OTHER_PUNCTUATION = 24;
2240: 
2241:   /**
2242:    * Sm = Symbol, Math (Informative).
2243:    *
2244:    * @since 1.1
2245:    */
2246:   public static final byte MATH_SYMBOL = 25;
2247: 
2248:   /**
2249:    * Sc = Symbol, Currency (Informative).
2250:    *
2251:    * @since 1.1
2252:    */
2253:   public static final byte CURRENCY_SYMBOL = 26;
2254: 
2255:   /**
2256:    * Sk = Symbol, Modifier (Informative).
2257:    *
2258:    * @since 1.1
2259:    */
2260:   public static final byte MODIFIER_SYMBOL = 27;
2261: 
2262:   /**
2263:    * So = Symbol, Other (Informative).
2264:    *
2265:    * @since 1.1
2266:    */
2267:   public static final byte OTHER_SYMBOL = 28;
2268: 
2269:   /**
2270:    * Undefined bidirectional character type. Undefined char values have
2271:    * undefined directionality in the Unicode specification.
2272:    *
2273:    * @since 1.4
2274:    */
2275:   public static final byte DIRECTIONALITY_UNDEFINED = -1;
2276: 
2277:   /**
2278:    * Strong bidirectional character type "L".
2279:    *
2280:    * @since 1.4
2281:    */
2282:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
2283: 
2284:   /**
2285:    * Strong bidirectional character type "R".
2286:    *
2287:    * @since 1.4
2288:    */
2289:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
2290: 
2291:   /**
2292:    * Strong bidirectional character type "AL".
2293:    *
2294:    * @since 1.4
2295:    */
2296:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
2297: 
2298:   /**
2299:    * Weak bidirectional character type "EN".
2300:    *
2301:    * @since 1.4
2302:    */
2303:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
2304: 
2305:   /**
2306:    * Weak bidirectional character type "ES".
2307:    *
2308:    * @since 1.4
2309:    */
2310:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
2311: 
2312:   /**
2313:    * Weak bidirectional character type "ET".
2314:    *
2315:    * @since 1.4
2316:    */
2317:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
2318: 
2319:   /**
2320:    * Weak bidirectional character type "AN".
2321:    *
2322:    * @since 1.4
2323:    */
2324:   public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
2325: 
2326:   /**
2327:    * Weak bidirectional character type "CS".
2328:    *
2329:    * @since 1.4
2330:    */
2331:   public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
2332: 
2333:   /**
2334:    * Weak bidirectional character type "NSM".
2335:    *
2336:    * @since 1.4
2337:    */
2338:   public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
2339: 
2340:   /**
2341:    * Weak bidirectional character type "BN".
2342:    *
2343:    * @since 1.4
2344:    */
2345:   public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
2346: 
2347:   /**
2348:    * Neutral bidirectional character type "B".
2349:    *
2350:    * @since 1.4
2351:    */
2352:   public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
2353: 
2354:   /**
2355:    * Neutral bidirectional character type "S".
2356:    *
2357:    * @since 1.4
2358:    */
2359:   public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
2360: 
2361:   /**
2362:    * Strong bidirectional character type "WS".
2363:    *
2364:    * @since 1.4
2365:    */
2366:   public static final byte DIRECTIONALITY_WHITESPACE = 12;
2367: 
2368:   /**
2369:    * Neutral bidirectional character type "ON".
2370:    *
2371:    * @since 1.4
2372:    */
2373:   public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
2374: 
2375:   /**
2376:    * Strong bidirectional character type "LRE".
2377:    *
2378:    * @since 1.4
2379:    */
2380:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
2381: 
2382:   /**
2383:    * Strong bidirectional character type "LRO".
2384:    *
2385:    * @since 1.4
2386:    */
2387:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
2388: 
2389:   /**
2390:    * Strong bidirectional character type "RLE".
2391:    *
2392:    * @since 1.4
2393:    */
2394:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
2395: 
2396:   /**
2397:    * Strong bidirectional character type "RLO".
2398:    *
2399:    * @since 1.4
2400:    */
2401:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
2402: 
2403:   /**
2404:    * Weak bidirectional character type "PDF".
2405:    *
2406:    * @since 1.4
2407:    */
2408:   public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
2409: 
2410:   /**
2411:    * Stores unicode block offset lookup table. Exploit package visibility of
2412:    * String.value to avoid copying the array.
2413:    * @see #readCodePoint(int)
2414:    * @see CharData#BLOCKS
2415:    */
2416:   private static final char[][] blocks = 
2417:     new char[][]{
2418:                  String.zeroBasedStringValue(CharData.BLOCKS[0]),
2419:                  String.zeroBasedStringValue(CharData.BLOCKS[1]),
2420:                  String.zeroBasedStringValue(CharData.BLOCKS[2]),
2421:                  String.zeroBasedStringValue(CharData.BLOCKS[3]),
2422:                  String.zeroBasedStringValue(CharData.BLOCKS[4]),
2423:                  String.zeroBasedStringValue(CharData.BLOCKS[5]),
2424:                  String.zeroBasedStringValue(CharData.BLOCKS[6]),
2425:                  String.zeroBasedStringValue(CharData.BLOCKS[7]),
2426:                  String.zeroBasedStringValue(CharData.BLOCKS[8]),
2427:                  String.zeroBasedStringValue(CharData.BLOCKS[9]),
2428:                  String.zeroBasedStringValue(CharData.BLOCKS[10]),
2429:                  String.zeroBasedStringValue(CharData.BLOCKS[11]),
2430:                  String.zeroBasedStringValue(CharData.BLOCKS[12]),
2431:                  String.zeroBasedStringValue(CharData.BLOCKS[13]),
2432:                  String.zeroBasedStringValue(CharData.BLOCKS[14]),
2433:                  String.zeroBasedStringValue(CharData.BLOCKS[15]),
2434:                  String.zeroBasedStringValue(CharData.BLOCKS[16])};
2435: 
2436:   /**
2437:    * Stores unicode attribute offset lookup table. Exploit package visibility
2438:    * of String.value to avoid copying the array.
2439:    * @see CharData#DATA
2440:    */
2441:   private static final char[][] data = 
2442:     new char[][]{
2443:                  String.zeroBasedStringValue(CharData.DATA[0]),
2444:                  String.zeroBasedStringValue(CharData.DATA[1]),
2445:                  String.zeroBasedStringValue(CharData.DATA[2]),
2446:                  String.zeroBasedStringValue(CharData.DATA[3]),
2447:                  String.zeroBasedStringValue(CharData.DATA[4]),
2448:                  String.zeroBasedStringValue(CharData.DATA[5]),
2449:                  String.zeroBasedStringValue(CharData.DATA[6]),
2450:                  String.zeroBasedStringValue(CharData.DATA[7]),
2451:                  String.zeroBasedStringValue(CharData.DATA[8]),
2452:                  String.zeroBasedStringValue(CharData.DATA[9]),
2453:                  String.zeroBasedStringValue(CharData.DATA[10]),
2454:                  String.zeroBasedStringValue(CharData.DATA[11]),
2455:                  String.zeroBasedStringValue(CharData.DATA[12]),
2456:                  String.zeroBasedStringValue(CharData.DATA[13]),
2457:                  String.zeroBasedStringValue(CharData.DATA[14]),
2458:                  String.zeroBasedStringValue(CharData.DATA[15]),
2459:                  String.zeroBasedStringValue(CharData.DATA[16])};
2460: 
2461:   /**
2462:    * Stores unicode numeric value attribute table. Exploit package visibility
2463:    * of String.value to avoid copying the array.
2464:    * @see CharData#NUM_VALUE
2465:    */
2466:   private static final char[][] numValue = 
2467:     new char[][]{
2468:                  String.zeroBasedStringValue(CharData.NUM_VALUE[0]),
2469:                  String.zeroBasedStringValue(CharData.NUM_VALUE[1]),
2470:                  String.zeroBasedStringValue(CharData.NUM_VALUE[2]),
2471:                  String.zeroBasedStringValue(CharData.NUM_VALUE[3]),
2472:                  String.zeroBasedStringValue(CharData.NUM_VALUE[4]),
2473:                  String.zeroBasedStringValue(CharData.NUM_VALUE[5]),
2474:                  String.zeroBasedStringValue(CharData.NUM_VALUE[6]),
2475:                  String.zeroBasedStringValue(CharData.NUM_VALUE[7]),
2476:                  String.zeroBasedStringValue(CharData.NUM_VALUE[8]),
2477:                  String.zeroBasedStringValue(CharData.NUM_VALUE[9]),
2478:                  String.zeroBasedStringValue(CharData.NUM_VALUE[10]),
2479:                  String.zeroBasedStringValue(CharData.NUM_VALUE[11]),
2480:                  String.zeroBasedStringValue(CharData.NUM_VALUE[12]),
2481:                  String.zeroBasedStringValue(CharData.NUM_VALUE[13]),
2482:                  String.zeroBasedStringValue(CharData.NUM_VALUE[14]),
2483:                  String.zeroBasedStringValue(CharData.NUM_VALUE[15]),
2484:                  String.zeroBasedStringValue(CharData.NUM_VALUE[16])};
2485: 
2486:   /**
2487:    * Stores unicode uppercase attribute table. Exploit package visibility
2488:    * of String.value to avoid copying the array.
2489:    * @see CharData#UPPER
2490:    */  
2491:   private static final char[][] upper = 
2492:     new char[][]{
2493:                  String.zeroBasedStringValue(CharData.UPPER[0]),
2494:                  String.zeroBasedStringValue(CharData.UPPER[1]),
2495:                  String.zeroBasedStringValue(CharData.UPPER[2]),
2496:                  String.zeroBasedStringValue(CharData.UPPER[3]),
2497:                  String.zeroBasedStringValue(CharData.UPPER[4]),
2498:                  String.zeroBasedStringValue(CharData.UPPER[5]),
2499:                  String.zeroBasedStringValue(CharData.UPPER[6]),
2500:                  String.zeroBasedStringValue(CharData.UPPER[7]),
2501:                  String.zeroBasedStringValue(CharData.UPPER[8]),
2502:                  String.zeroBasedStringValue(CharData.UPPER[9]),
2503:                  String.zeroBasedStringValue(CharData.UPPER[10]),
2504:                  String.zeroBasedStringValue(CharData.UPPER[11]),
2505:                  String.zeroBasedStringValue(CharData.UPPER[12]),
2506:                  String.zeroBasedStringValue(CharData.UPPER[13]),
2507:                  String.zeroBasedStringValue(CharData.UPPER[14]),
2508:                  String.zeroBasedStringValue(CharData.UPPER[15]),
2509:                  String.zeroBasedStringValue(CharData.UPPER[16])};
2510: 
2511:   /**
2512:    * Stores unicode lowercase attribute table. Exploit package visibility
2513:    * of String.value to avoid copying the array.
2514:    * @see CharData#LOWER
2515:    */
2516:   private static final char[][] lower = 
2517:     new char[][]{
2518:                  String.zeroBasedStringValue(CharData.LOWER[0]),
2519:                  String.zeroBasedStringValue(CharData.LOWER[1]),
2520:                  String.zeroBasedStringValue(CharData.LOWER[2]),
2521:                  String.zeroBasedStringValue(CharData.LOWER[3]),
2522:                  String.zeroBasedStringValue(CharData.LOWER[4]),
2523:                  String.zeroBasedStringValue(CharData.LOWER[5]),
2524:                  String.zeroBasedStringValue(CharData.LOWER[6]),
2525:                  String.zeroBasedStringValue(CharData.LOWER[7]),
2526:                  String.zeroBasedStringValue(CharData.LOWER[8]),
2527:                  String.zeroBasedStringValue(CharData.LOWER[9]),
2528:                  String.zeroBasedStringValue(CharData.LOWER[10]),
2529:                  String.zeroBasedStringValue(CharData.LOWER[11]),
2530:                  String.zeroBasedStringValue(CharData.LOWER[12]),
2531:                  String.zeroBasedStringValue(CharData.LOWER[13]),
2532:                  String.zeroBasedStringValue(CharData.LOWER[14]),
2533:                  String.zeroBasedStringValue(CharData.LOWER[15]),
2534:                  String.zeroBasedStringValue(CharData.LOWER[16])};
2535: 
2536:   /**
2537:    * Stores unicode direction attribute table. Exploit package visibility
2538:    * of String.value to avoid copying the array.
2539:    * @see CharData#DIRECTION
2540:    */
2541:   // Package visible for use by String.
2542:   static final char[][] direction = 
2543:     new char[][]{
2544:                  String.zeroBasedStringValue(CharData.DIRECTION[0]),
2545:                  String.zeroBasedStringValue(CharData.DIRECTION[1]),
2546:                  String.zeroBasedStringValue(CharData.DIRECTION[2]),
2547:                  String.zeroBasedStringValue(CharData.DIRECTION[3]),
2548:                  String.zeroBasedStringValue(CharData.DIRECTION[4]),
2549:                  String.zeroBasedStringValue(CharData.DIRECTION[5]),
2550:                  String.zeroBasedStringValue(CharData.DIRECTION[6]),
2551:                  String.zeroBasedStringValue(CharData.DIRECTION[7]),
2552:                  String.zeroBasedStringValue(CharData.DIRECTION[8]),
2553:                  String.zeroBasedStringValue(CharData.DIRECTION[9]),
2554:                  String.zeroBasedStringValue(CharData.DIRECTION[10]),
2555:                  String.zeroBasedStringValue(CharData.DIRECTION[11]),
2556:                  String.zeroBasedStringValue(CharData.DIRECTION[12]),
2557:                  String.zeroBasedStringValue(CharData.DIRECTION[13]),
2558:                  String.zeroBasedStringValue(CharData.DIRECTION[14]),
2559:                  String.zeroBasedStringValue(CharData.DIRECTION[15]),
2560:                  String.zeroBasedStringValue(CharData.DIRECTION[16])};
2561: 
2562:   /**
2563:    * Stores unicode titlecase table. Exploit package visibility of
2564:    * String.value to avoid copying the array.
2565:    * @see CharData#TITLE
2566:    */
2567:   private static final char[] title = String.zeroBasedStringValue(CharData.TITLE);  
2568: 
2569:   /**
2570:    * Mask for grabbing the type out of the contents of data.
2571:    * @see CharData#DATA
2572:    */
2573:   private static final int TYPE_MASK = 0x1F;
2574: 
2575:   /**
2576:    * Mask for grabbing the non-breaking space flag out of the contents of
2577:    * data.
2578:    * @see CharData#DATA
2579:    */
2580:   private static final int NO_BREAK_MASK = 0x20;
2581: 
2582:   /**
2583:    * Mask for grabbing the mirrored directionality flag out of the contents
2584:    * of data.
2585:    * @see CharData#DATA
2586:    */
2587:   private static final int MIRROR_MASK = 0x40;
2588: 
2589:   /**
2590:    * Grabs an attribute offset from the Unicode attribute database. The lower
2591:    * 5 bits are the character type, the next 2 bits are flags, and the top
2592:    * 9 bits are the offset into the attribute tables.
2593:    *
2594:    * @param codePoint the character to look up
2595:    * @return the character's attribute offset and type
2596:    * @see #TYPE_MASK
2597:    * @see #NO_BREAK_MASK
2598:    * @see #MIRROR_MASK
2599:    * @see CharData#DATA
2600:    * @see CharData#SHIFT
2601:    */
2602:   // Package visible for use in String.
2603:   static char readCodePoint(int codePoint)
2604:   {
2605:     int plane = codePoint >>> 16;
2606:     char offset = (char) (codePoint & 0xffff);
2607:     return data[plane][(char) (blocks[plane][offset >> CharData.SHIFT[plane]] + offset)];
2608:   }
2609: 
2610:   /**
2611:    * Wraps up a character.
2612:    *
2613:    * @param value the character to wrap
2614:    */
2615:   public Character(char value)
2616:   {
2617:     this.value = value;
2618:   }
2619: 
2620:   /**
2621:    * Returns the character which has been wrapped by this class.
2622:    *
2623:    * @return the character wrapped
2624:    */
2625:   public char charValue()
2626:   {
2627:     return value;
2628:   }
2629: 
2630:   /**
2631:    * Returns the numerical value (unsigned) of the wrapped character.
2632:    * Range of returned values: 0x0000-0xFFFF.
2633:    *
2634:    * @return the value of the wrapped character
2635:    */
2636:   public int hashCode()
2637:   {
2638:     return value;
2639:   }
2640: 
2641:   /**
2642:    * Determines if an object is equal to this object. This is only true for
2643:    * another Character object wrapping the same value.
2644:    *
2645:    * @param o object to compare
2646:    * @return true if o is a Character with the same value
2647:    */
2648:   public boolean equals(Object o)
2649:   {
2650:     return o instanceof Character && value == ((Character) o).value;
2651:   }
2652: 
2653:   /**
2654:    * Converts the wrapped character into a String.
2655:    *
2656:    * @return a String containing one character -- the wrapped character
2657:    *         of this instance
2658:    */
2659:   public String toString()
2660:   {
2661:     // Package constructor avoids an array copy.
2662:     return new String(new char[] { value }, 0, 1, true);
2663:   }
2664: 
2665:   /**
2666:    * Returns a String of length 1 representing the specified character.
2667:    *
2668:    * @param ch the character to convert
2669:    * @return a String containing the character
2670:    * @since 1.4
2671:    */
2672:   public static String toString(char ch)
2673:   {
2674:     // Package constructor avoids an array copy.
2675:     return new String(new char[] { ch }, 0, 1, true);
2676:   }
2677: 
2678:   /**
2679:    * Determines if a character is a Unicode lowercase letter. For example,
2680:    * <code>'a'</code> is lowercase.  Returns true if getType() returns
2681:    * LOWERCASE_LETTER.
2682:    * <br>
2683:    * lowercase = [Ll]
2684:    *
2685:    * @param ch character to test
2686:    * @return true if ch is a Unicode lowercase letter, else false
2687:    * @see #isUpperCase(char)
2688:    * @see #isTitleCase(char)
2689:    * @see #toLowerCase(char)
2690:    * @see #getType(char)
2691:    */
2692:   public static boolean isLowerCase(char ch)
2693:   {
2694:     return isLowerCase((int)ch);
2695:   }
2696:   
2697:   /**
2698:    * Determines if a character is a Unicode lowercase letter. For example,
2699:    * <code>'a'</code> is lowercase.  Returns true if getType() returns
2700:    * LOWERCASE_LETTER.
2701:    * <br>
2702:    * lowercase = [Ll]
2703:    *
2704:    * @param codePoint character to test
2705:    * @return true if ch is a Unicode lowercase letter, else false
2706:    * @see #isUpperCase(char)
2707:    * @see #isTitleCase(char)
2708:    * @see #toLowerCase(char)
2709:    * @see #getType(char)
2710:    * 
2711:    * @since 1.5
2712:    */
2713:   public static boolean isLowerCase(int codePoint)
2714:   {
2715:     return getType(codePoint) == LOWERCASE_LETTER;
2716:   }
2717: 
2718:   /**
2719:    * Determines if a character is a Unicode uppercase letter. For example,
2720:    * <code>'A'</code> is uppercase.  Returns true if getType() returns
2721:    * UPPERCASE_LETTER.
2722:    * <br>
2723:    * uppercase = [Lu]
2724:    *
2725:    * @param ch character to test
2726:    * @return true if ch is a Unicode uppercase letter, else false
2727:    * @see #isLowerCase(char)
2728:    * @see #isTitleCase(char)
2729:    * @see #toUpperCase(char)
2730:    * @see #getType(char)
2731:    */
2732:   public static boolean isUpperCase(char ch)
2733:   {
2734:     return isUpperCase((int)ch);
2735:   }
2736:   
2737:   /**
2738:    * Determines if a character is a Unicode uppercase letter. For example,
2739:    * <code>'A'</code> is uppercase.  Returns true if getType() returns
2740:    * UPPERCASE_LETTER.
2741:    * <br>
2742:    * uppercase = [Lu]
2743:    *
2744:    * @param codePoint character to test
2745:    * @return true if ch is a Unicode uppercase letter, else false
2746:    * @see #isLowerCase(char)
2747:    * @see #isTitleCase(char)
2748:    * @see #toUpperCase(char)
2749:    * @see #getType(char)
2750:    * 
2751:    * @since 1.5
2752:    */
2753:   public static boolean isUpperCase(int codePoint)
2754:   {
2755:     return getType(codePoint) == UPPERCASE_LETTER;
2756:   }
2757: 
2758:   /**
2759:    * Determines if a character is a Unicode titlecase letter. For example,
2760:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
2761:    * True if getType() returns TITLECASE_LETTER.
2762:    * <br>
2763:    * titlecase = [Lt]
2764:    *
2765:    * @param ch character to test
2766:    * @return true if ch is a Unicode titlecase letter, else false
2767:    * @see #isLowerCase(char)
2768:    * @see #isUpperCase(char)
2769:    * @see #toTitleCase(char)
2770:    * @see #getType(char)
2771:    */
2772:   public static boolean isTitleCase(char ch)
2773:   {
2774:     return isTitleCase((int)ch);
2775:   }
2776: 
2777:   /**
2778:    * Determines if a character is a Unicode titlecase letter. For example,
2779:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
2780:    * True if getType() returns TITLECASE_LETTER.
2781:    * <br>
2782:    * titlecase = [Lt]
2783:    *
2784:    * @param codePoint character to test
2785:    * @return true if ch is a Unicode titlecase letter, else false
2786:    * @see #isLowerCase(char)
2787:    * @see #isUpperCase(char)
2788:    * @see #toTitleCase(char)
2789:    * @see #getType(char)
2790:    * 
2791:    * @since 1.5
2792:    */
2793:   public static boolean isTitleCase(int codePoint)
2794:   {
2795:     return getType(codePoint) == TITLECASE_LETTER;
2796:   }
2797:   
2798: 
2799:   /**
2800:    * Determines if a character is a Unicode decimal digit. For example,
2801:    * <code>'0'</code> is a digit.  A character is a Unicode digit if
2802:    * getType() returns DECIMAL_DIGIT_NUMBER.
2803:    * <br>
2804:    * Unicode decimal digit = [Nd]
2805:    *
2806:    * @param ch character to test
2807:    * @return true if ch is a Unicode decimal digit, else false
2808:    * @see #digit(char, int)
2809:    * @see #forDigit(int, int)
2810:    * @see #getType(char)
2811:    */
2812:   public static boolean isDigit(char ch)
2813:   {
2814:     return isDigit((int)ch);
2815:   }
2816:   
2817:   /**
2818:    * Determines if a character is a Unicode decimal digit. For example,
2819:    * <code>'0'</code> is a digit. A character is a Unicode digit if
2820:    * getType() returns DECIMAL_DIGIT_NUMBER.
2821:    * <br>
2822:    * Unicode decimal digit = [Nd]
2823:    *
2824:    * @param codePoint character to test
2825:    * @return true if ch is a Unicode decimal digit, else false
2826:    * @see #digit(char, int)
2827:    * @see #forDigit(int, int)
2828:    * @see #getType(char)
2829:    * 
2830:    * @since 1.5
2831:    */
2832: 
2833:   public static boolean isDigit(int codePoint)
2834:   {
2835:     return getType(codePoint) == DECIMAL_DIGIT_NUMBER;
2836:   }
2837: 
2838:   /**
2839:    * Determines if a character is part of the Unicode Standard. This is an
2840:    * evolving standard, but covers every character in the data file.
2841:    * <br>
2842:    * defined = not [Cn]
2843:    *
2844:    * @param ch character to test
2845:    * @return true if ch is a Unicode character, else false
2846:    * @see #isDigit(char)
2847:    * @see #isLetter(char)
2848:    * @see #isLetterOrDigit(char)
2849:    * @see #isLowerCase(char)
2850:    * @see #isTitleCase(char)
2851:    * @see #isUpperCase(char)
2852:    */
2853:   public static boolean isDefined(char ch)
2854:   {
2855:     return isDefined((int)ch);
2856:   }
2857:   
2858:   /**
2859:    * Determines if a character is part of the Unicode Standard. This is an
2860:    * evolving standard, but covers every character in the data file.
2861:    * <br>
2862:    * defined = not [Cn]
2863:    *
2864:    * @param codePoint character to test
2865:    * @return true if ch is a Unicode character, else false
2866:    * @see #isDigit(char)
2867:    * @see #isLetter(char)
2868:    * @see #isLetterOrDigit(char)
2869:    * @see #isLowerCase(char)
2870:    * @see #isTitleCase(char)
2871:    * @see #isUpperCase(char)
2872:    * 
2873:    * @since 1.5
2874:    */
2875:   public static boolean isDefined(int codePoint)
2876:   {
2877:     return getType(codePoint) != UNASSIGNED;
2878:   }
2879: 
2880:   /**
2881:    * Determines if a character is a Unicode letter. Not all letters have case,
2882:    * so this may return true when isLowerCase and isUpperCase return false.
2883:    * A character is a Unicode letter if getType() returns one of 
2884:    * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER,
2885:    * or OTHER_LETTER.
2886:    * <br>
2887:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2888:    *
2889:    * @param ch character to test
2890:    * @return true if ch is a Unicode letter, else false
2891:    * @see #isDigit(char)
2892:    * @see #isJavaIdentifierStart(char)
2893:    * @see #isJavaLetter(char)
2894:    * @see #isJavaLetterOrDigit(char)
2895:    * @see #isLetterOrDigit(char)
2896:    * @see #isLowerCase(char)
2897:    * @see #isTitleCase(char)
2898:    * @see #isUnicodeIdentifierStart(char)
2899:    * @see #isUpperCase(char)
2900:    */
2901:   public static boolean isLetter(char ch)
2902:   {
2903:     return isLetter((int)ch);
2904:   }
2905:   
2906:   /**
2907:    * Determines if a character is a Unicode letter. Not all letters have case,
2908:    * so this may return true when isLowerCase and isUpperCase return false.
2909:    * A character is a Unicode letter if getType() returns one of 
2910:    * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER,
2911:    * or OTHER_LETTER.
2912:    * <br>
2913:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2914:    *
2915:    * @param codePoint character to test
2916:    * @return true if ch is a Unicode letter, else false
2917:    * @see #isDigit(char)
2918:    * @see #isJavaIdentifierStart(char)
2919:    * @see #isJavaLetter(char)
2920:    * @see #isJavaLetterOrDigit(char)
2921:    * @see #isLetterOrDigit(char)
2922:    * @see #isLowerCase(char)
2923:    * @see #isTitleCase(char)
2924:    * @see #isUnicodeIdentifierStart(char)
2925:    * @see #isUpperCase(char)
2926:    * 
2927:    * @since 1.5
2928:    */
2929:   public static boolean isLetter(int codePoint)
2930:   {
2931:     return ((1 << getType(codePoint))
2932:         & ((1 << UPPERCASE_LETTER)
2933:             | (1 << LOWERCASE_LETTER)
2934:             | (1 << TITLECASE_LETTER)
2935:             | (1 << MODIFIER_LETTER)
2936:             | (1 << OTHER_LETTER))) != 0;
2937:   }
2938:   /**
2939:    * Returns the index into the given CharSequence that is offset
2940:    * <code>codePointOffset</code> code points from <code>index</code>.
2941:    * @param seq the CharSequence
2942:    * @param index the start position in the CharSequence
2943:    * @param codePointOffset the number of code points offset from the start
2944:    * position
2945:    * @return the index into the CharSequence that is codePointOffset code 
2946:    * points offset from index
2947:    * 
2948:    * @throws NullPointerException if seq is null
2949:    * @throws IndexOutOfBoundsException if index is negative or greater than the
2950:    * length of the sequence.
2951:    * @throws IndexOutOfBoundsException if codePointOffset is positive and the 
2952:    * subsequence from index to the end of seq has fewer than codePointOffset
2953:    * code points
2954:    * @throws IndexOutOfBoundsException if codePointOffset is negative and the
2955:    * subsequence from the start of seq to index has fewer than 
2956:    * (-codePointOffset) code points
2957:    * @since 1.5
2958:    */
2959:   public static int offsetByCodePoints(CharSequence seq,
2960:                                        int index,
2961:                                        int codePointOffset)
2962:   {
2963:     int len = seq.length();
2964:     if (index < 0 || index > len)
2965:       throw new IndexOutOfBoundsException();
2966:     
2967:     int numToGo = codePointOffset;
2968:     int offset = index;
2969:     int adjust = 1;
2970:     if (numToGo >= 0)
2971:       {
2972:         for (; numToGo > 0; offset++)
2973:           {
2974:             numToGo--;
2975:             if (Character.isHighSurrogate(seq.charAt(offset))
2976:                 && (offset + 1) < len
2977:                 && Character.isLowSurrogate(seq.charAt(offset + 1)))
2978:               offset++;
2979:           }
2980:         return offset;
2981:       }
2982:     else
2983:       {
2984:         numToGo *= -1;
2985:         for (; numToGo > 0;)
2986:           {
2987:             numToGo--;
2988:             offset--;
2989:             if (Character.isLowSurrogate(seq.charAt(offset))
2990:                 && (offset - 1) >= 0
2991:                 && Character.isHighSurrogate(seq.charAt(offset - 1)))
2992:               offset--;
2993:           }
2994:         return offset;
2995:       }
2996:   }
2997:   
2998:   /**
2999:    * Returns the index into the given char subarray that is offset
3000:    * <code>codePointOffset</code> code points from <code>index</code>.
3001:    * @param a the char array
3002:    * @param start the start index of the subarray
3003:    * @param count the length of the subarray
3004:    * @param index the index to be offset
3005:    * @param codePointOffset the number of code points offset from <code>index
3006:    * </code>
3007:    * @return the index into the char array
3008:    * 
3009:    * @throws NullPointerException if a is null
3010:    * @throws IndexOutOfBoundsException if start or count is negative or if
3011:    * start + count is greater than the length of the array
3012:    * @throws IndexOutOfBoundsException if index is less than start or larger 
3013:    * than start + count
3014:    * @throws IndexOutOfBoundsException if codePointOffset is positive and the
3015:    * subarray from index to start + count - 1 has fewer than codePointOffset
3016:    * code points.
3017:    * @throws IndexOutOfBoundsException if codePointOffset is negative and the
3018:    * subarray from start to index - 1 has fewer than (-codePointOffset) code
3019:    * points
3020:    * 
3021:    * @since 1.5
3022:    */
3023:   public static int offsetByCodePoints(char[] a,
3024:                                        int start,
3025:                                        int count,
3026:                                        int index,
3027:                                        int codePointOffset)
3028:   {
3029:     int len = a.length;
3030:     int end = start + count;
3031:     if (start < 0 || count < 0 || end > len || index < start || index > end)
3032:       throw new IndexOutOfBoundsException();
3033:     
3034:     int numToGo = codePointOffset;
3035:     int offset = index;
3036:     int adjust = 1;
3037:     if (numToGo >= 0)
3038:       {
3039:         for (; numToGo > 0; offset++)
3040:           {
3041:             numToGo--;
3042:             if (Character.isHighSurrogate(a[offset])
3043:                 && (offset + 1) < len
3044:                 && Character.isLowSurrogate(a[offset + 1]))
3045:               offset++;
3046:           }
3047:         return offset;
3048:       }
3049:     else
3050:       {
3051:         numToGo *= -1;
3052:         for (; numToGo > 0;)
3053:           {
3054:             numToGo--;
3055:             offset--;
3056:             if (Character.isLowSurrogate(a[offset])
3057:                 && (offset - 1) >= 0
3058:                 && Character.isHighSurrogate(a[offset - 1]))
3059:               offset--;
3060:             if (offset < start)
3061:               throw new IndexOutOfBoundsException();
3062:           }
3063:         return offset;
3064:       }
3065: 
3066:   }
3067:   
3068:   /**
3069:    * Returns the number of Unicode code points in the specified range of the
3070:    * given CharSequence.  The first char in the range is at position
3071:    * beginIndex and the last one is at position endIndex - 1.  Paired 
3072:    * surrogates (supplementary characters are represented by a pair of chars - 
3073:    * one from the high surrogates and one from the low surrogates) 
3074:    * count as just one code point.
3075:    * @param seq the CharSequence to inspect
3076:    * @param beginIndex the beginning of the range
3077:    * @param endIndex the end of the range
3078:    * @return the number of Unicode code points in the given range of the 
3079:    * sequence
3080:    * @throws NullPointerException if seq is null
3081:    * @throws IndexOutOfBoundsException if beginIndex is negative, endIndex is
3082:    * larger than the length of seq, or if beginIndex is greater than endIndex.
3083:    * @since 1.5
3084:    */
3085:   public static int codePointCount(CharSequence seq, int beginIndex,
3086:                                    int endIndex)
3087:   {
3088:     int len = seq.length();
3089:     if (beginIndex < 0 || endIndex > len || beginIndex > endIndex)
3090:       throw new IndexOutOfBoundsException();
3091:         
3092:     int count = 0;
3093:     for (int i = beginIndex; i < endIndex; i++)
3094:       {
3095:         count++;
3096:         // If there is a pairing, count it only once.
3097:         if (isHighSurrogate(seq.charAt(i)) && (i + 1) < endIndex
3098:             && isLowSurrogate(seq.charAt(i + 1)))
3099:           i ++;
3100:       }    
3101:     return count;
3102:   }
3103:   
3104:   /**
3105:    * Returns the number of Unicode code points in the specified range of the
3106:    * given char array.  The first char in the range is at position
3107:    * offset and the length of the range is count.  Paired surrogates
3108:    * (supplementary characters are represented by a pair of chars - 
3109:    * one from the high surrogates and one from the low surrogates) 
3110:    * count as just one code point.
3111:    * @param a the char array to inspect
3112:    * @param offset the beginning of the range
3113:    * @param count the length of the range
3114:    * @return the number of Unicode code points in the given range of the 
3115:    * array
3116:    * @throws NullPointerException if a is null
3117:    * @throws IndexOutOfBoundsException if offset or count is negative or if 
3118:    * offset + countendIndex is larger than the length of a.
3119:    * @since 1.5
3120:    */
3121:   public static int codePointCount(char[] a, int offset,
3122:                                    int count)
3123:   {
3124:     int len = a.length;
3125:     int end = offset + count;
3126:     if (offset < 0 || count < 0 || end > len)
3127:       throw new IndexOutOfBoundsException();
3128:         
3129:     int counter = 0;
3130:     for (int i = offset; i < end; i++)
3131:       {
3132:         counter++;
3133:         // If there is a pairing, count it only once.
3134:         if (isHighSurrogate(a[i]) && (i + 1) < end
3135:             && isLowSurrogate(a[i + 1]))
3136:           i ++;
3137:       }    
3138:     return counter;
3139:   }
3140: 
3141:   /**
3142:    * Determines if a character is a Unicode letter or a Unicode digit. This
3143:    * is the combination of isLetter and isDigit.
3144:    * <br>
3145:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
3146:    *
3147:    * @param ch character to test
3148:    * @return true if ch is a Unicode letter or a Unicode digit, else false
3149:    * @see #isDigit(char)
3150:    * @see #isJavaIdentifierPart(char)
3151:    * @see #isJavaLetter(char)
3152:    * @see #isJavaLetterOrDigit(char)
3153:    * @see #isLetter(char)
3154:    * @see #isUnicodeIdentifierPart(char)
3155:    */
3156:   public static boolean isLetterOrDigit(char ch)
3157:   {
3158:     return isLetterOrDigit((int)ch);
3159:   }
3160: 
3161:   /**
3162:    * Determines if a character is a Unicode letter or a Unicode digit. This
3163:    * is the combination of isLetter and isDigit.
3164:    * <br>
3165:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
3166:    *
3167:    * @param codePoint character to test
3168:    * @return true if ch is a Unicode letter or a Unicode digit, else false
3169:    * @see #isDigit(char)
3170:    * @see #isJavaIdentifierPart(char)
3171:    * @see #isJavaLetter(char)
3172:    * @see #isJavaLetterOrDigit(char)
3173:    * @see #isLetter(char)
3174:    * @see #isUnicodeIdentifierPart(char)
3175:    * 
3176:    * @since 1.5
3177:    */
3178:   public static boolean isLetterOrDigit(int codePoint)
3179:   {
3180:     return ((1 << getType(codePoint))
3181:         & ((1 << UPPERCASE_LETTER)
3182:            | (1 << LOWERCASE_LETTER)
3183:            | (1 << TITLECASE_LETTER)
3184:            | (1 << MODIFIER_LETTER)
3185:            | (1 << OTHER_LETTER)
3186:            | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
3187:   }
3188:   
3189:   /**
3190:    * Determines if a character can start a Java identifier. This is the
3191:    * combination of isLetter, any character where getType returns
3192:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3193:    * (like '_').
3194:    *
3195:    * @param ch character to test
3196:    * @return true if ch can start a Java identifier, else false
3197:    * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
3198:    * @see #isJavaLetterOrDigit(char)
3199:    * @see #isJavaIdentifierStart(char)
3200:    * @see #isJavaIdentifierPart(char)
3201:    * @see #isLetter(char)
3202:    * @see #isLetterOrDigit(char)
3203:    * @see #isUnicodeIdentifierStart(char)
3204:    */
3205:   public static boolean isJavaLetter(char ch)
3206:   {
3207:     return isJavaIdentifierStart(ch);
3208:   }
3209: 
3210:   /**
3211:    * Determines if a character can follow the first letter in
3212:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
3213:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3214:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3215:    * or isIdentifierIgnorable.
3216:    *
3217:    * @param ch character to test
3218:    * @return true if ch can follow the first letter in a Java identifier
3219:    * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
3220:    * @see #isJavaLetter(char)
3221:    * @see #isJavaIdentifierStart(char)
3222:    * @see #isJavaIdentifierPart(char)
3223:    * @see #isLetter(char)
3224:    * @see #isLetterOrDigit(char)
3225:    * @see #isUnicodeIdentifierPart(char)
3226:    * @see #isIdentifierIgnorable(char)
3227:    */
3228:   public static boolean isJavaLetterOrDigit(char ch)
3229:   {
3230:     return isJavaIdentifierPart(ch);
3231:   }
3232: 
3233:   /**
3234:    * Determines if a character can start a Java identifier. This is the
3235:    * combination of isLetter, any character where getType returns
3236:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3237:    * (like '_').
3238:    * <br>
3239:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
3240:    *
3241:    * @param ch character to test
3242:    * @return true if ch can start a Java identifier, else false
3243:    * @see #isJavaIdentifierPart(char)
3244:    * @see #isLetter(char)
3245:    * @see #isUnicodeIdentifierStart(char)
3246:    * @since 1.1
3247:    */
3248:   public static boolean isJavaIdentifierStart(char ch)
3249:   {
3250:     return isJavaIdentifierStart((int)ch);
3251:   }
3252: 
3253:   /**
3254:    * Determines if a character can start a Java identifier. This is the
3255:    * combination of isLetter, any character where getType returns
3256:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3257:    * (like '_').
3258:    * <br>
3259:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
3260:    *
3261:    * @param codePoint character to test
3262:    * @return true if ch can start a Java identifier, else false
3263:    * @see #isJavaIdentifierPart(char)
3264:    * @see #isLetter(char)
3265:    * @see #isUnicodeIdentifierStart(char)
3266:    * @since 1.5
3267:    */
3268:   public static boolean isJavaIdentifierStart(int codePoint)
3269:   {
3270:     return ((1 << getType(codePoint))
3271:             & ((1 << UPPERCASE_LETTER)
3272:                | (1 << LOWERCASE_LETTER)
3273:                | (1 << TITLECASE_LETTER)
3274:                | (1 << MODIFIER_LETTER)
3275:                | (1 << OTHER_LETTER)
3276:                | (1 << LETTER_NUMBER)
3277:                | (1 << CURRENCY_SYMBOL)
3278:                | (1 << CONNECTOR_PUNCTUATION))) != 0;
3279:   }
3280: 
3281:   /**
3282:    * Determines if a character can follow the first letter in
3283:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
3284:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3285:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3286:    * or isIdentifierIgnorable.
3287:    * <br>
3288:    * Java identifier extender =
3289:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
3290:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3291:    *
3292:    * @param ch character to test
3293:    * @return true if ch can follow the first letter in a Java identifier
3294:    * @see #isIdentifierIgnorable(char)
3295:    * @see #isJavaIdentifierStart(char)
3296:    * @see #isLetterOrDigit(char)
3297:    * @see #isUnicodeIdentifierPart(char)
3298:    * @since 1.1
3299:    */
3300:   public static boolean isJavaIdentifierPart(char ch)
3301:   {
3302:     return isJavaIdentifierPart((int)ch);
3303:   }
3304:   
3305:   /**
3306:    * Determines if a character can follow the first letter in
3307:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
3308:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3309:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3310:    * or isIdentifierIgnorable.
3311:    * <br>
3312:    * Java identifier extender =
3313:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
3314:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3315:    *
3316:    * @param codePoint character to test
3317:    * @return true if ch can follow the first letter in a Java identifier
3318:    * @see #isIdentifierIgnorable(char)
3319:    * @see #isJavaIdentifierStart(char)
3320:    * @see #isLetterOrDigit(char)
3321:    * @see #isUnicodeIdentifierPart(char)
3322:    * @since 1.5
3323:    */
3324:   public static boolean isJavaIdentifierPart(int codePoint)
3325:   {
3326:     int category = getType(codePoint);
3327:     return ((1 << category)
3328:             & ((1 << UPPERCASE_LETTER)
3329:                | (1 << LOWERCASE_LETTER)
3330:                | (1 << TITLECASE_LETTER)
3331:                | (1 << MODIFIER_LETTER)
3332:                | (1 << OTHER_LETTER)
3333:                | (1 << NON_SPACING_MARK)
3334:                | (1 << COMBINING_SPACING_MARK)
3335:                | (1 << DECIMAL_DIGIT_NUMBER)
3336:                | (1 << LETTER_NUMBER)
3337:                | (1 << CURRENCY_SYMBOL)
3338:                | (1 << CONNECTOR_PUNCTUATION)
3339:                | (1 << FORMAT))) != 0
3340:       || (category == CONTROL && isIdentifierIgnorable(codePoint));
3341:   }
3342: 
3343:   /**
3344:    * Determines if a character can start a Unicode identifier.  Only
3345:    * letters can start a Unicode identifier, but this includes characters
3346:    * in LETTER_NUMBER.
3347:    * <br>
3348:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
3349:    *
3350:    * @param ch character to test
3351:    * @return true if ch can start a Unicode identifier, else false
3352:    * @see #isJavaIdentifierStart(char)
3353:    * @see #isLetter(char)
3354:    * @see #isUnicodeIdentifierPart(char)
3355:    * @since 1.1
3356:    */
3357:   public static boolean isUnicodeIdentifierStart(char ch)
3358:   {
3359:     return isUnicodeIdentifierStart((int)ch);
3360:   }
3361: 
3362:   /**
3363:    * Determines if a character can start a Unicode identifier.  Only
3364:    * letters can start a Unicode identifier, but this includes characters
3365:    * in LETTER_NUMBER.
3366:    * <br>
3367:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
3368:    *
3369:    * @param codePoint character to test
3370:    * @return true if ch can start a Unicode identifier, else false
3371:    * @see #isJavaIdentifierStart(char)
3372:    * @see #isLetter(char)
3373:    * @see #isUnicodeIdentifierPart(char)
3374:    * @since 1.5
3375:    */
3376:   public static boolean isUnicodeIdentifierStart(int codePoint)
3377:   {
3378:     return ((1 << getType(codePoint))
3379:             & ((1 << UPPERCASE_LETTER)
3380:                | (1 << LOWERCASE_LETTER)
3381:                | (1 << TITLECASE_LETTER)
3382:                | (1 << MODIFIER_LETTER)
3383:                | (1 << OTHER_LETTER)
3384:                | (1 << LETTER_NUMBER))) != 0;
3385:   }
3386: 
3387:   /**
3388:    * Determines if a character can follow the first letter in
3389:    * a Unicode identifier. This includes letters, connecting punctuation,
3390:    * digits, numeric letters, combining marks, non-spacing marks, and
3391:    * isIdentifierIgnorable.
3392:    * <br>
3393:    * Unicode identifier extender =
3394:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
3395:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3396:    *
3397:    * @param ch character to test
3398:    * @return true if ch can follow the first letter in a Unicode identifier
3399:    * @see #isIdentifierIgnorable(char)
3400:    * @see #isJavaIdentifierPart(char)
3401:    * @see #isLetterOrDigit(char)
3402:    * @see #isUnicodeIdentifierStart(char)
3403:    * @since 1.1
3404:    */
3405:   public static boolean isUnicodeIdentifierPart(char ch)
3406:   {
3407:     return isUnicodeIdentifierPart((int)ch);
3408:   }
3409:   
3410:   /**
3411:    * Determines if a character can follow the first letter in
3412:    * a Unicode identifier. This includes letters, connecting punctuation,
3413:    * digits, numeric letters, combining marks, non-spacing marks, and
3414:    * isIdentifierIgnorable.
3415:    * <br>
3416:    * Unicode identifier extender =
3417:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
3418:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3419:    *
3420:    * @param codePoint character to test
3421:    * @return true if ch can follow the first letter in a Unicode identifier
3422:    * @see #isIdentifierIgnorable(char)
3423:    * @see #isJavaIdentifierPart(char)
3424:    * @see #isLetterOrDigit(char)
3425:    * @see #isUnicodeIdentifierStart(char)
3426:    * @since 1.5
3427:    */
3428:   public static boolean isUnicodeIdentifierPart(int codePoint)
3429:   {
3430:     int category = getType(codePoint);
3431:     return ((1 << category)
3432:             & ((1 << UPPERCASE_LETTER)
3433:                | (1 << LOWERCASE_LETTER)
3434:                | (1 << TITLECASE_LETTER)
3435:                | (1 << MODIFIER_LETTER)
3436:                | (1 << OTHER_LETTER)
3437:                | (1 << NON_SPACING_MARK)
3438:                | (1 << COMBINING_SPACING_MARK)
3439:                | (1 << DECIMAL_DIGIT_NUMBER)
3440:                | (1 << LETTER_NUMBER)
3441:                | (1 << CONNECTOR_PUNCTUATION)
3442:                | (1 << FORMAT))) != 0
3443:       || (category == CONTROL && isIdentifierIgnorable(codePoint));
3444:   }
3445: 
3446:   /**
3447:    * Determines if a character is ignorable in a Unicode identifier. This
3448:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
3449:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
3450:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
3451:    * <code>'\u009F'</code>), and FORMAT characters.
3452:    * <br>
3453:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
3454:    *    |U+007F-U+009F
3455:    *
3456:    * @param ch character to test
3457:    * @return true if ch is ignorable in a Unicode or Java identifier
3458:    * @see #isJavaIdentifierPart(char)
3459:    * @see #isUnicodeIdentifierPart(char)
3460:    * @since 1.1
3461:    */
3462:   public static boolean isIdentifierIgnorable(char ch)
3463:   {
3464:     return isIdentifierIgnorable((int)ch);
3465:   }
3466: 
3467:   /**
3468:    * Determines if a character is ignorable in a Unicode identifier. This
3469:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
3470:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
3471:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
3472:    * <code>'\u009F'</code>), and FORMAT characters.
3473:    * <br>
3474:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
3475:    *    |U+007F-U+009F
3476:    *
3477:    * @param codePoint character to test
3478:    * @return true if ch is ignorable in a Unicode or Java identifier
3479:    * @see #isJavaIdentifierPart(char)
3480:    * @see #isUnicodeIdentifierPart(char)
3481:    * @since 1.5
3482:    */
3483:   public static boolean isIdentifierIgnorable(int codePoint)
3484:   {
3485:     if ((codePoint >= 0 && codePoint <= 0x0008)
3486:         || (codePoint >= 0x000E && codePoint <= 0x001B)
3487:         || (codePoint >= 0x007F && codePoint <= 0x009F)
3488:         || getType(codePoint) == FORMAT)
3489:       return true;
3490:     return false;
3491:   }
3492: 
3493:   /**
3494:    * Converts a Unicode character into its lowercase equivalent mapping.
3495:    * If a mapping does not exist, then the character passed is returned.
3496:    * Note that isLowerCase(toLowerCase(ch)) does not always return true.
3497:    *
3498:    * @param ch character to convert to lowercase
3499:    * @return lowercase mapping of ch, or ch if lowercase mapping does
3500:    *         not exist
3501:    * @see #isLowerCase(char)
3502:    * @see #isUpperCase(char)
3503:    * @see #toTitleCase(char)
3504:    * @see #toUpperCase(char)
3505:    */
3506:   public static char toLowerCase(char ch)
3507:   {
3508:     return (char) (lower[0][readCodePoint((int)ch) >>> 7] + ch);
3509:   }
3510:   
3511:   /**
3512:    * Converts a Unicode character into its lowercase equivalent mapping.
3513:    * If a mapping does not exist, then the character passed is returned.
3514:    * Note that isLowerCase(toLowerCase(ch)) does not always return true.
3515:    *
3516:    * @param codePoint character to convert to lowercase
3517:    * @return lowercase mapping of ch, or ch if lowercase mapping does
3518:    *         not exist
3519:    * @see #isLowerCase(char)
3520:    * @see #isUpperCase(char)
3521:    * @see #toTitleCase(char)
3522:    * @see #toUpperCase(char)
3523:    * 
3524:    * @since 1.5
3525:    */
3526:   public static int toLowerCase(int codePoint)
3527:   {
3528:     // If the code point is unassigned or in one of the private use areas
3529:     // then we delegate the call to the appropriate private static inner class.
3530:     int plane = codePoint >>> 16;
3531:     if (plane > 2 && plane < 14)
3532:       return UnassignedCharacters.toLowerCase(codePoint);
3533:     if (plane > 14)
3534:       return PrivateUseCharacters.toLowerCase(codePoint);
3535:     
3536:     // The short value stored in lower[plane] is the signed difference between
3537:     // codePoint and its lowercase conversion.
3538:     return ((short)lower[plane][readCodePoint(codePoint) >>> 7]) + codePoint;
3539:   }
3540: 
3541:   /**
3542:    * Converts a Unicode character into its uppercase equivalent mapping.
3543:    * If a mapping does not exist, then the character passed is returned.
3544:    * Note that isUpperCase(toUpperCase(ch)) does not always return true.
3545:    *
3546:    * @param ch character to convert to uppercase
3547:    * @return uppercase mapping of ch, or ch if uppercase mapping does
3548:    *         not exist
3549:    * @see #isLowerCase(char)
3550:    * @see #isUpperCase(char)
3551:    * @see #toLowerCase(char)
3552:    * @see #toTitleCase(char)
3553:    */
3554:   public static char toUpperCase(char ch)
3555:   {
3556:     return (char) (upper[0][readCodePoint((int)ch) >>> 7] + ch);
3557:   }
3558:   
3559:   /**
3560:    * Converts a Unicode character into its uppercase equivalent mapping.
3561:    * If a mapping does not exist, then the character passed is returned.
3562:    * Note that isUpperCase(toUpperCase(ch)) does not always return true.
3563:    *
3564:    * @param codePoint character to convert to uppercase
3565:    * @return uppercase mapping of ch, or ch if uppercase mapping does
3566:    *         not exist
3567:    * @see #isLowerCase(char)
3568:    * @see #isUpperCase(char)
3569:    * @see #toLowerCase(char)
3570:    * @see #toTitleCase(char)
3571:    * 
3572:    * @since 1.5
3573:    */
3574:   public static int toUpperCase(int codePoint)
3575:   {
3576:     // If the code point is unassigned or in one of the private use areas
3577:     // then we delegate the call to the appropriate private static inner class.
3578:     int plane = codePoint >>> 16;
3579:     if (plane > 2 && plane < 14)
3580:       return UnassignedCharacters.toUpperCase(codePoint);
3581:     if (plane > 14)
3582:       return PrivateUseCharacters.toUpperCase(codePoint);
3583:         
3584:     // The short value stored in upper[plane] is the signed difference between
3585:     // codePoint and its uppercase conversion.
3586:     return ((short)upper[plane][readCodePoint(codePoint) >>> 7]) + codePoint;
3587:   }
3588: 
3589:   /**
3590:    * Converts a Unicode character into its titlecase equivalent mapping.
3591:    * If a mapping does not exist, then the character passed is returned.
3592:    * Note that isTitleCase(toTitleCase(ch)) does not always return true.
3593:    *
3594:    * @param ch character to convert to titlecase
3595:    * @return titlecase mapping of ch, or ch if titlecase mapping does
3596:    *         not exist
3597:    * @see #isTitleCase(char)
3598:    * @see #toLowerCase(char)
3599:    * @see #toUpperCase(char)
3600:    */
3601:   public static char toTitleCase(char ch)
3602:   {
3603:     // As title is short, it doesn't hurt to exhaustively iterate over it.
3604:     for (int i = title.length - 2; i >= 0; i -= 2)
3605:       if (title[i] == ch)
3606:         return title[i + 1];
3607:     return toUpperCase(ch);
3608:   }
3609: 
3610:   /**
3611:    * Converts a Unicode character into its titlecase equivalent mapping.
3612:    * If a mapping does not exist, then the character passed is returned.
3613:    * Note that isTitleCase(toTitleCase(ch)) does not always return true.
3614:    *
3615:    * @param codePoint character to convert to titlecase
3616:    * @return titlecase mapping of ch, or ch if titlecase mapping does
3617:    *         not exist
3618:    * @see #isTitleCase(char)
3619:    * @see #toLowerCase(char)
3620:    * @see #toUpperCase(char)
3621:    * 
3622:    * @since 1.5
3623:    */
3624:   public static int toTitleCase(int codePoint)
3625:   {
3626:     // As of Unicode 4.0.0 no characters outside of plane 0 have
3627:     // titlecase mappings that are different from their uppercase
3628:     // mapping.
3629:     if (codePoint < 0x10000)
3630:       return (int) toTitleCase((char)codePoint);
3631:     return toUpperCase(codePoint);
3632:   }
3633: 
3634:   /**
3635:    * Converts a character into a digit of the specified radix. If the radix
3636:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
3637:    * exceeds the radix, or if ch is not a decimal digit or in the case
3638:    * insensitive set of 'a'-'z', the result is -1.
3639:    * <br>
3640:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3641:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3642:    *
3643:    * @param ch character to convert into a digit
3644:    * @param radix radix in which ch is a digit
3645:    * @return digit which ch represents in radix, or -1 not a valid digit
3646:    * @see #MIN_RADIX
3647:    * @see #MAX_RADIX
3648:    * @see #forDigit(int, int)
3649:    * @see #isDigit(char)
3650:    * @see #getNumericValue(char)
3651:    */
3652:   public static int digit(char ch, int radix)
3653:   {
3654:     if (radix < MIN_RADIX || radix > MAX_RADIX)
3655:       return -1;
3656:     char attr = readCodePoint((int)ch);
3657:     if (((1 << (attr & TYPE_MASK))
3658:          & ((1 << UPPERCASE_LETTER)
3659:             | (1 << LOWERCASE_LETTER)
3660:             | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
3661:       {
3662:         // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
3663:         int digit = numValue[0][attr >> 7];
3664:         return (digit < radix) ? digit : -1;
3665:       }
3666:     return -1;
3667:   }
3668: 
3669:   /**
3670:    * Converts a character into a digit of the specified radix. If the radix
3671:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
3672:    * exceeds the radix, or if ch is not a decimal digit or in the case
3673:    * insensitive set of 'a'-'z', the result is -1.
3674:    * <br>
3675:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3676:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3677:    *
3678:    * @param codePoint character to convert into a digit
3679:    * @param radix radix in which ch is a digit
3680:    * @return digit which ch represents in radix, or -1 not a valid digit
3681:    * @see #MIN_RADIX
3682:    * @see #MAX_RADIX
3683:    * @see #forDigit(int, int)
3684:    * @see #isDigit(char)
3685:    * @see #getNumericValue(char)
3686:    */
3687:   public static int digit(int codePoint, int radix)
3688:   {
3689:     if (radix < MIN_RADIX || radix > MAX_RADIX)
3690:       return -1;
3691:     
3692:     // If the code point is unassigned or in one of the private use areas
3693:     // then we delegate the call to the appropriate private static inner class.
3694:     int plane = codePoint >>> 16;
3695:     if (plane > 2 && plane < 14)
3696:       return UnassignedCharacters.digit(codePoint, radix);
3697:     if (plane > 14)
3698:       return PrivateUseCharacters.digit(codePoint, radix);
3699:     char attr = readCodePoint(codePoint);
3700:     if (((1 << (attr & TYPE_MASK))
3701:          & ((1 << UPPERCASE_LETTER)
3702:             | (1 << LOWERCASE_LETTER)
3703:             | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
3704:       {
3705:         // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
3706:         int digit = numValue[plane][attr >> 7];
3707:         
3708:         // If digit is less than or equal to -3 then the numerical value was 
3709:         // too large to fit into numValue and is stored in CharData.LARGENUMS.
3710:         if (digit <= -3)
3711:           digit = CharData.LARGENUMS[-digit - 3];
3712:         return (digit < radix) ? digit : -1;
3713:       }
3714:     return -1;
3715:   }
3716:   
3717:   /**
3718:    * Returns the Unicode numeric value property of a character. For example,
3719:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3720:    *
3721:    * <p>This method also returns values for the letters A through Z, (not
3722:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
3723:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3724:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3725:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3726:    * <code>'\uFF5A'</code> (full width variants).
3727:    *
3728:    * <p>If the character lacks a numeric value property, -1 is returned.
3729:    * If the character has a numeric value property which is not representable
3730:    * as a nonnegative integer, such as a fraction, -2 is returned.
3731:    *
3732:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3733:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3734:    *
3735:    * @param ch character from which the numeric value property will
3736:    *        be retrieved
3737:    * @return the numeric value property of ch, or -1 if it does not exist, or
3738:    *         -2 if it is not representable as a nonnegative integer
3739:    * @see #forDigit(int, int)
3740:    * @see #digit(char, int)
3741:    * @see #isDigit(char)
3742:    * @since 1.1
3743:    */
3744:   public static int getNumericValue(char ch)
3745:   {
3746:     // Treat numValue as signed.
3747:     return (short) numValue[0][readCodePoint((int)ch) >> 7];
3748:   }
3749:   
3750:   /**
3751:    * Returns the Unicode numeric value property of a character. For example,
3752:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3753:    *
3754:    * <p>This method also returns values for the letters A through Z, (not
3755:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
3756:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3757:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3758:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3759:    * <code>'\uFF5A'</code> (full width variants).
3760:    *
3761:    * <p>If the character lacks a numeric value property, -1 is returned.
3762:    * If the character has a numeric value property which is not representable
3763:    * as a nonnegative integer, such as a fraction, -2 is returned.
3764:    *
3765:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3766:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3767:    *
3768:    * @param codePoint character from which the numeric value property will
3769:    *        be retrieved
3770:    * @return the numeric value property of ch, or -1 if it does not exist, or
3771:    *         -2 if it is not representable as a nonnegative integer
3772:    * @see #forDigit(int, int)
3773:    * @see #digit(char, int)
3774:    * @see #isDigit(char)
3775:    * @since 1.5
3776:    */
3777:   public static int getNumericValue(int codePoint)
3778:   {
3779:     // If the code point is unassigned or in one of the private use areas
3780:     // then we delegate the call to the appropriate private static inner class.
3781:     int plane = codePoint >>> 16;
3782:     if (plane > 2 && plane < 14)
3783:       return UnassignedCharacters.getNumericValue(codePoint);
3784:     if (plane > 14)
3785:       return PrivateUseCharacters.getNumericValue(codePoint);
3786:     
3787:     // If the value N found in numValue[plane] is less than or equal to -3
3788:     // then the numeric value was too big to fit into 16 bits and is 
3789:     // stored in CharData.LARGENUMS at offset (-N - 3).
3790:     short num = (short)numValue[plane][readCodePoint(codePoint) >> 7];
3791:     if (num <= -3)
3792:       return CharData.LARGENUMS[-num - 3];
3793:     return num;
3794:   }
3795: 
3796:   /**
3797:    * Determines if a character is a ISO-LATIN-1 space. This is only the five
3798:    * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
3799:    * <code>'\r'</code>, and <code>' '</code>.
3800:    * <br>
3801:    * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
3802:    *
3803:    * @param ch character to test
3804:    * @return true if ch is a space, else false
3805:    * @deprecated Replaced by {@link #isWhitespace(char)}
3806:    * @see #isSpaceChar(char)
3807:    * @see #isWhitespace(char)
3808:    */
3809:   public static boolean isSpace(char ch)
3810:   {
3811:     // Performing the subtraction up front alleviates need to compare longs.
3812:     return ch-- <= ' ' && ((1 << ch)
3813:                            & ((1 << (' ' - 1))
3814:                               | (1 << ('\t' - 1))
3815:                               | (1 << ('\n' - 1))
3816:                               | (1 << ('\r' - 1))
3817:                               | (1 << ('\f' - 1)))) != 0;
3818:   }
3819: 
3820:   /**
3821:    * Determines if a character is a Unicode space character. This includes
3822:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
3823:    * <br>
3824:    * Unicode space = [Zs]|[Zp]|[Zl]
3825:    *
3826:    * @param ch character to test
3827:    * @return true if ch is a Unicode space, else false
3828:    * @see #isWhitespace(char)
3829:    * @since 1.1
3830:    */
3831:   public static boolean isSpaceChar(char ch)
3832:   {
3833:     return isSpaceChar((int)ch);
3834:   }
3835:   
3836:   /**
3837:    * Determines if a character is a Unicode space character. This includes
3838:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
3839:    * <br>
3840:    * Unicode space = [Zs]|[Zp]|[Zl]
3841:    *
3842:    * @param codePoint character to test
3843:    * @return true if ch is a Unicode space, else false
3844:    * @see #isWhitespace(char)
3845:    * @since 1.5
3846:    */
3847:   public static boolean isSpaceChar(int codePoint)
3848:   {
3849:     return ((1 << getType(codePoint))
3850:             & ((1 << SPACE_SEPARATOR)
3851:                | (1 << LINE_SEPARATOR)
3852:                | (1 << PARAGRAPH_SEPARATOR))) != 0;
3853:   }
3854: 
3855:   /**
3856:    * Determines if a character is Java whitespace. This includes Unicode
3857:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3858:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3859:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3860:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3861:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3862:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3863:    * and <code>'\u001F'</code>.
3864:    * <br>
3865:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3866:    *
3867:    * @param ch character to test
3868:    * @return true if ch is Java whitespace, else false
3869:    * @see #isSpaceChar(char)
3870:    * @since 1.1
3871:    */
3872:   public static boolean isWhitespace(char ch)
3873:   {
3874:     return isWhitespace((int) ch);
3875:   }
3876:   
3877:   /**
3878:    * Determines if a character is Java whitespace. This includes Unicode
3879:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3880:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3881:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3882:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3883:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3884:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3885:    * and <code>'\u001F'</code>.
3886:    * <br>
3887:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3888:    *
3889:    * @param codePoint character to test
3890:    * @return true if ch is Java whitespace, else false
3891:    * @see #isSpaceChar(char)
3892:    * @since 1.5
3893:    */
3894:   public static boolean isWhitespace(int codePoint)
3895:   {
3896:     int plane = codePoint >>> 16;
3897:     if (plane > 2 && plane < 14)
3898:       return UnassignedCharacters.isWhiteSpace(codePoint);
3899:     if (plane > 14)
3900:       return PrivateUseCharacters.isWhiteSpace(codePoint);
3901:     
3902:     int attr = readCodePoint(codePoint);
3903:     return ((((1 << (attr & TYPE_MASK))
3904:               & ((1 << SPACE_SEPARATOR)
3905:                  | (1 << LINE_SEPARATOR)
3906:                  | (1 << PARAGRAPH_SEPARATOR))) != 0)
3907:             && (attr & NO_BREAK_MASK) == 0)
3908:       || (codePoint <= '\u001F' && ((1 << codePoint)
3909:                              & ((1 << '\t')
3910:                                 | (1 << '\n')
3911:                                 | (1 << '\u000B')
3912:                                 | (1 << '\u000C')
3913:                                 | (1 << '\r')
3914:                                 | (1 << '\u001C')
3915:                                 | (1 << '\u001D')
3916:                                 | (1 << '\u001E')
3917:                                 | (1 << '\u001F'))) != 0);
3918:   }
3919: 
3920:   /**
3921:    * Determines if a character has the ISO Control property.
3922:    * <br>
3923:    * ISO Control = [Cc]
3924:    *
3925:    * @param ch character to test
3926:    * @return true if ch is an ISO Control character, else false
3927:    * @see #isSpaceChar(char)
3928:    * @see #isWhitespace(char)
3929:    * @since 1.1
3930:    */
3931:   public static boolean isISOControl(char ch)
3932:   {
3933:     return isISOControl((int)ch);
3934:   }
3935:   
3936:   /**
3937:    * Determines if the character is an ISO Control character.  This is true
3938:    * if the code point is in the range [0, 0x001F] or if it is in the range
3939:    * [0x007F, 0x009F].
3940:    * @param codePoint the character to check
3941:    * @return true if the character is in one of the above ranges
3942:    * 
3943:    * @since 1.5
3944:    */
3945:   public static boolean isISOControl(int codePoint)
3946:   {
3947:     if ((codePoint >= 0 && codePoint <= 0x001F)
3948:         || (codePoint >= 0x007F && codePoint <= 0x009F))
3949:       return true;
3950:     return false;      
3951:   }
3952: 
3953:   /**
3954:    * Returns the Unicode general category property of a character.
3955:    *
3956:    * @param ch character from which the general category property will
3957:    *        be retrieved
3958:    * @return the character category property of ch as an integer
3959:    * @see #UNASSIGNED
3960:    * @see #UPPERCASE_LETTER
3961:    * @see #LOWERCASE_LETTER
3962:    * @see #TITLECASE_LETTER
3963:    * @see #MODIFIER_LETTER
3964:    * @see #OTHER_LETTER
3965:    * @see #NON_SPACING_MARK
3966:    * @see #ENCLOSING_MARK
3967:    * @see #COMBINING_SPACING_MARK
3968:    * @see #DECIMAL_DIGIT_NUMBER
3969:    * @see #LETTER_NUMBER
3970:    * @see #OTHER_NUMBER
3971:    * @see #SPACE_SEPARATOR
3972:    * @see #LINE_SEPARATOR
3973:    * @see #PARAGRAPH_SEPARATOR
3974:    * @see #CONTROL
3975:    * @see #FORMAT
3976:    * @see #PRIVATE_USE
3977:    * @see #SURROGATE
3978:    * @see #DASH_PUNCTUATION
3979:    * @see #START_PUNCTUATION
3980:    * @see #END_PUNCTUATION
3981:    * @see #CONNECTOR_PUNCTUATION
3982:    * @see #OTHER_PUNCTUATION
3983:    * @see #MATH_SYMBOL
3984:    * @see #CURRENCY_SYMBOL
3985:    * @see #MODIFIER_SYMBOL
3986:    * @see #INITIAL_QUOTE_PUNCTUATION
3987:    * @see #FINAL_QUOTE_PUNCTUATION
3988:    * @since 1.1
3989:    */
3990:   public static int getType(char ch)
3991:   {
3992:     return getType((int)ch);
3993:   }
3994:   
3995:   /**
3996:    * Returns the Unicode general category property of a character.
3997:    *
3998:    * @param codePoint character from which the general category property will
3999:    *        be retrieved
4000:    * @return the character category property of ch as an integer
4001:    * @see #UNASSIGNED
4002:    * @see #UPPERCASE_LETTER
4003:    * @see #LOWERCASE_LETTER
4004:    * @see #TITLECASE_LETTER
4005:    * @see #MODIFIER_LETTER
4006:    * @see #OTHER_LETTER
4007:    * @see #NON_SPACING_MARK
4008:    * @see #ENCLOSING_MARK
4009:    * @see #COMBINING_SPACING_MARK
4010:    * @see #DECIMAL_DIGIT_NUMBER
4011:    * @see #LETTER_NUMBER
4012:    * @see #OTHER_NUMBER
4013:    * @see #SPACE_SEPARATOR
4014:    * @see #LINE_SEPARATOR
4015:    * @see #PARAGRAPH_SEPARATOR
4016:    * @see #CONTROL
4017:    * @see #FORMAT
4018:    * @see #PRIVATE_USE
4019:    * @see #SURROGATE
4020:    * @see #DASH_PUNCTUATION
4021:    * @see #START_PUNCTUATION
4022:    * @see #END_PUNCTUATION
4023:    * @see #CONNECTOR_PUNCTUATION
4024:    * @see #OTHER_PUNCTUATION
4025:    * @see #MATH_SYMBOL
4026:    * @see #CURRENCY_SYMBOL
4027:    * @see #MODIFIER_SYMBOL
4028:    * @see #INITIAL_QUOTE_PUNCTUATION
4029:    * @see #FINAL_QUOTE_PUNCTUATION
4030:    * 
4031:    * @since 1.5
4032:    */
4033:   public static int getType(int codePoint)
4034:   {
4035:     // If the codePoint is unassigned or in one of the private use areas
4036:     // then we delegate the call to the appropriate private static inner class.
4037:     int plane = codePoint >>> 16;
4038:     if (plane > 2 && plane < 14)
4039:       return UnassignedCharacters.getType(codePoint);
4040:     if (plane > 14)
4041:       return PrivateUseCharacters.getType(codePoint);
4042:     
4043:     return readCodePoint(codePoint) & TYPE_MASK;
4044:   }
4045: 
4046:   /**
4047:    * Converts a digit into a character which represents that digit
4048:    * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
4049:    * or the digit exceeds the radix, then the null character <code>'\0'</code>
4050:    * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
4051:    * <br>
4052:    * return value boundary = U+0030-U+0039|U+0061-U+007A
4053:    *
4054:    * @param digit digit to be converted into a character
4055:    * @param radix radix of digit
4056:    * @return character representing digit in radix, or '\0'
4057:    * @see #MIN_RADIX
4058:    * @see #MAX_RADIX
4059:    * @see #digit(char, int)
4060:    */
4061:   public static char forDigit(int digit, int radix)
4062:   {
4063:     if (radix < MIN_RADIX || radix > MAX_RADIX
4064:         || digit < 0 || digit >= radix)
4065:       return '\0';
4066:     return Number.digits[digit];
4067:   }
4068: 
4069:   /**
4070:    * Returns the Unicode directionality property of the character. This
4071:    * is used in the visual ordering of text.
4072:    *
4073:    * @param ch the character to look up
4074:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
4075:    * @see #DIRECTIONALITY_UNDEFINED
4076:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
4077:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
4078:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4079:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
4080:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4081:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4082:    * @see #DIRECTIONALITY_ARABIC_NUMBER
4083:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4084:    * @see #DIRECTIONALITY_NONSPACING_MARK
4085:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
4086:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
4087:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
4088:    * @see #DIRECTIONALITY_WHITESPACE
4089:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
4090:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4091:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4092:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4093:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4094:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4095:    * @since 1.4
4096:    */
4097:   public static byte getDirectionality(char ch)
4098:   {
4099:     // The result will correctly be signed.
4100:     return getDirectionality((int)ch);
4101:   }
4102: 
4103:   
4104:   /**
4105:    * Returns the Unicode directionality property of the character. This
4106:    * is used in the visual ordering of text.
4107:    *
4108:    * @param codePoint the character to look up
4109:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
4110:    * @see #DIRECTIONALITY_UNDEFINED
4111:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
4112:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
4113:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4114:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
4115:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4116:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4117:    * @see #DIRECTIONALITY_ARABIC_NUMBER
4118:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4119:    * @see #DIRECTIONALITY_NONSPACING_MARK
4120:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
4121:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
4122:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
4123:    * @see #DIRECTIONALITY_WHITESPACE
4124:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
4125:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4126:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4127:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4128:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4129:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4130:    * @since 1.5
4131:    */
4132:   public static byte getDirectionality(int codePoint)
4133:   {
4134:     // If the code point is unassigned or in one of the private use areas
4135:     // then we delegate the call to the appropriate private static inner class.
4136:     int plane = codePoint >>> 16;
4137:     if (plane > 2 && plane < 14)
4138:       return UnassignedCharacters.getDirectionality(codePoint);
4139:     if (plane > 14)
4140:       return PrivateUseCharacters.getDirectionality(codePoint);
4141:     
4142:     // The result will correctly be signed.
4143:     return (byte) (direction[plane][readCodePoint(codePoint) >> 7] >> 2);
4144:   }
4145:   
4146:   /**
4147:    * Determines whether the character is mirrored according to Unicode. For
4148:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
4149:    * left-to-right text, but ')' in right-to-left text.
4150:    *
4151:    * @param ch the character to look up
4152:    * @return true if the character is mirrored
4153:    * @since 1.4
4154:    */
4155:   public static boolean isMirrored(char ch)
4156:   {
4157:     return (readCodePoint((int)ch) & MIRROR_MASK) != 0;
4158:   }
4159:   
4160:   /**
4161:    * Determines whether the character is mirrored according to Unicode. For
4162:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
4163:    * left-to-right text, but ')' in right-to-left text.
4164:    *
4165:    * @param codePoint the character to look up
4166:    * @return true if the character is mirrored
4167:    * @since 1.5
4168:    */
4169:   public static boolean isMirrored(int codePoint)
4170:   {
4171:     // If the code point is unassigned or part of one of the private use areas
4172:     // then we delegate the call to the appropriate private static inner class.
4173:     int plane = codePoint >>> 16;
4174:     if (plane > 2 && plane < 14)
4175:       return UnassignedCharacters.isMirrored(codePoint);
4176:     if (plane > 14)
4177:       return PrivateUseCharacters.isMirrored(codePoint);
4178:     
4179:     return (readCodePoint(codePoint) & MIRROR_MASK) != 0;
4180:   }
4181: 
4182:   /**
4183:    * Compares another Character to this Character, numerically.
4184:    *
4185:    * @param anotherCharacter Character to compare with this Character
4186:    * @return a negative integer if this Character is less than
4187:    *         anotherCharacter, zero if this Character is equal, and
4188:    *         a positive integer if this Character is greater
4189:    * @throws NullPointerException if anotherCharacter is null
4190:    * @since 1.2
4191:    */
4192:   public int compareTo(Character anotherCharacter)
4193:   {
4194:     return value - anotherCharacter.value;
4195:   }
4196: 
4197:   /**
4198:    * Returns an <code>Character</code> object wrapping the value.
4199:    * In contrast to the <code>Character</code> constructor, this method
4200:    * will cache some values.  It is used by boxing conversion.
4201:    *
4202:    * @param val the value to wrap
4203:    * @return the <code>Character</code>
4204:    *
4205:    * @since 1.5
4206:    */
4207:   public static Character valueOf(char val)
4208:   {
4209:     if (val > MAX_CACHE)
4210:       return new Character(val);
4211:     synchronized (charCache)
4212:       {
4213:     if (charCache[val - MIN_VALUE] == null)
4214:       charCache[val - MIN_VALUE] = new Character(val);
4215:     return charCache[val - MIN_VALUE];
4216:       }
4217:   }
4218: 
4219:   /**
4220:    * Reverse the bytes in val.
4221:    * @since 1.5
4222:    */
4223:   public static char reverseBytes(char val)
4224:   {
4225:     return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00));
4226:   }
4227: 
4228:   /**
4229:    * Converts a unicode code point to a UTF-16 representation of that
4230:    * code point.
4231:    * 
4232:    * @param codePoint the unicode code point
4233:    *
4234:    * @return the UTF-16 representation of that code point
4235:    *
4236:    * @throws IllegalArgumentException if the code point is not a valid
4237:    *         unicode code point
4238:    *
4239:    * @since 1.5
4240:    */
4241:   public static char[] toChars(int codePoint)
4242:   {
4243:     if (!isValidCodePoint(codePoint))
4244:       throw new IllegalArgumentException("Illegal Unicode code point : "
4245:                                          + codePoint);
4246:     char[] result = new char[charCount(codePoint)];
4247:     int ignore = toChars(codePoint, result, 0);
4248:     return result;
4249:   }
4250: 
4251:   /**
4252:    * Converts a unicode code point to its UTF-16 representation.
4253:    *
4254:    * @param codePoint the unicode code point
4255:    * @param dst the target char array
4256:    * @param dstIndex the start index for the target
4257:    *
4258:    * @return number of characters written to <code>dst</code>
4259:    *
4260:    * @throws IllegalArgumentException if <code>codePoint</code> is not a
4261:    *         valid unicode code point
4262:    * @throws NullPointerException if <code>dst</code> is <code>null</code>
4263:    * @throws IndexOutOfBoundsException if <code>dstIndex</code> is not valid
4264:    *         in <code>dst</code> or if the UTF-16 representation does not
4265:    *         fit into <code>dst</code>
4266:    *
4267:    * @since 1.5
4268:    */
4269:   public static int toChars(int codePoint, char[] dst, int dstIndex)
4270:   {
4271:     if (!isValidCodePoint(codePoint))
4272:       {
4273:         throw new IllegalArgumentException("not a valid code point: "
4274:                                            + codePoint);
4275:       }
4276: 
4277:     int result;
4278:     if (isSupplementaryCodePoint(codePoint))
4279:       {
4280:         // Write second char first to cause IndexOutOfBoundsException
4281:         // immediately.
4282:         final int cp2 = codePoint - 0x10000;
4283:         dst[dstIndex + 1] = (char) ((cp2 % 0x400) + (int) MIN_LOW_SURROGATE);
4284:         dst[dstIndex] = (char) ((cp2 / 0x400) + (int) MIN_HIGH_SURROGATE);
4285:         result = 2;
4286:       }
4287:     else
4288:       {
4289:         dst[dstIndex] = (char) codePoint;
4290:         result = 1; 
4291:       }
4292:     return result;
4293:   }
4294: 
4295:   /**
4296:    * Return number of 16-bit characters required to represent the given
4297:    * code point.
4298:    *
4299:    * @param codePoint a unicode code point
4300:    *
4301:    * @return 2 if codePoint >= 0x10000, 1 otherwise.
4302:    *
4303:    * @since 1.5
4304:    */
4305:   public static int charCount(int codePoint)
4306:   {
4307:     return 
4308:       (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) 
4309:       ? 2 
4310:       : 1;
4311:   }
4312: 
4313:   /**
4314:    * Determines whether the specified code point is
4315:    * in the range 0x10000 .. 0x10FFFF, i.e. the character is within the Unicode
4316:    * supplementary character range.
4317:    *
4318:    * @param codePoint a Unicode code point
4319:    *
4320:    * @return <code>true</code> if code point is in supplementary range
4321:    *
4322:    * @since 1.5
4323:    */
4324:   public static boolean isSupplementaryCodePoint(int codePoint)
4325:   {
4326:     return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4327:       && codePoint <= MAX_CODE_POINT;
4328:   }
4329: 
4330:   /**
4331:    * Determines whether the specified code point is
4332:    * in the range 0x0000 .. 0x10FFFF, i.e. it is a valid Unicode code point.
4333:    *
4334:    * @param codePoint a Unicode code point
4335:    *
4336:    * @return <code>true</code> if code point is valid
4337:    *
4338:    * @since 1.5
4339:    */
4340:   public static boolean isValidCodePoint(int codePoint)
4341:   {
4342:     return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
4343:   }
4344: 
4345:   /**
4346:    * Return true if the given character is a high surrogate.
4347:    * @param ch the character
4348:    * @return true if the character is a high surrogate character
4349:    *
4350:    * @since 1.5
4351:    */
4352:   public static boolean isHighSurrogate(char ch)
4353:   {
4354:     return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
4355:   }
4356: 
4357:   /**
4358:    * Return true if the given character is a low surrogate.
4359:    * @param ch the character
4360:    * @return true if the character is a low surrogate character
4361:    *
4362:    * @since 1.5
4363:    */
4364:   public static boolean isLowSurrogate(char ch)
4365:   {
4366:     return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
4367:   }
4368: 
4369:   /**
4370:    * Return true if the given characters compose a surrogate pair.
4371:    * This is true if the first character is a high surrogate and the
4372:    * second character is a low surrogate.
4373:    * @param ch1 the first character
4374:    * @param ch2 the first character
4375:    * @return true if the characters compose a surrogate pair
4376:    *
4377:    * @since 1.5
4378:    */
4379:   public static boolean isSurrogatePair(char ch1, char ch2)
4380:   {
4381:     return isHighSurrogate(ch1) && isLowSurrogate(ch2);
4382:   }
4383: 
4384:   /**
4385:    * Given a valid surrogate pair, this returns the corresponding
4386:    * code point.
4387:    * @param high the high character of the pair
4388:    * @param low the low character of the pair
4389:    * @return the corresponding code point
4390:    *
4391:    * @since 1.5
4392:    */
4393:   public static int toCodePoint(char high, char low)
4394:   {
4395:     return ((high - MIN_HIGH_SURROGATE) * 0x400) +
4396:       (low - MIN_LOW_SURROGATE) + 0x10000;
4397:   }
4398: 
4399:   /**
4400:    * Get the code point at the specified index in the CharSequence.
4401:    * This is like CharSequence#charAt(int), but if the character is
4402:    * the start of a surrogate pair, and there is a following
4403:    * character, and this character completes the pair, then the
4404:    * corresponding supplementary code point is returned.  Otherwise,
4405:    * the character at the index is returned.
4406:    *
4407:    * @param sequence the CharSequence
4408:    * @param index the index of the codepoint to get, starting at 0
4409:    * @return the codepoint at the specified index
4410:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4411:    * @since 1.5
4412:    */
4413:   public static int codePointAt(CharSequence sequence, int index)
4414:   {
4415:     int len = sequence.length();
4416:     if (index < 0 || index >= len)
4417:       throw new IndexOutOfBoundsException();
4418:     char high = sequence.charAt(index);
4419:     if (! isHighSurrogate(high) || ++index >= len)
4420:       return high;
4421:     char low = sequence.charAt(index);
4422:     if (! isLowSurrogate(low))
4423:       return high;
4424:     return toCodePoint(high, low);
4425:   }
4426: 
4427:   /**
4428:    * Get the code point at the specified index in the CharSequence.
4429:    * If the character is the start of a surrogate pair, and there is a
4430:    * following character, and this character completes the pair, then
4431:    * the corresponding supplementary code point is returned.
4432:    * Otherwise, the character at the index is returned.
4433:    *
4434:    * @param chars the character array in which to look
4435:    * @param index the index of the codepoint to get, starting at 0
4436:    * @return the codepoint at the specified index
4437:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4438:    * @since 1.5
4439:    */
4440:   public static int codePointAt(char[] chars, int index)
4441:   {
4442:     return codePointAt(chars, index, chars.length);
4443:   }
4444: 
4445:   /**
4446:    * Get the code point at the specified index in the CharSequence.
4447:    * If the character is the start of a surrogate pair, and there is a
4448:    * following character within the specified range, and this
4449:    * character completes the pair, then the corresponding
4450:    * supplementary code point is returned.  Otherwise, the character
4451:    * at the index is returned.
4452:    *
4453:    * @param chars the character array in which to look
4454:    * @param index the index of the codepoint to get, starting at 0
4455:    * @param limit the limit past which characters should not be examined
4456:    * @return the codepoint at the specified index
4457:    * @throws IndexOutOfBoundsException if index is negative or &gt;=
4458:    * limit, or if limit is negative or &gt;= the length of the array
4459:    * @since 1.5
4460:    */
4461:   public static int codePointAt(char[] chars, int index, int limit)
4462:   {
4463:     if (index < 0 || index >= limit || limit < 0 || limit > chars.length)
4464:       throw new IndexOutOfBoundsException();
4465:     char high = chars[index];
4466:     if (! isHighSurrogate(high) || ++index >= limit)
4467:       return high;
4468:     char low = chars[index];
4469:     if (! isLowSurrogate(low))
4470:       return high;
4471:     return toCodePoint(high, low);
4472:   }
4473: 
4474:   /**
4475:    * Get the code point before the specified index.  This is like
4476:    * #codePointAt(char[], int), but checks the characters at
4477:    * <code>index-1</code> and <code>index-2</code> to see if they form
4478:    * a supplementary code point.  If they do not, the character at
4479:    * <code>index-1</code> is returned.
4480:    *
4481:    * @param chars the character array
4482:    * @param index the index just past the codepoint to get, starting at 0
4483:    * @return the codepoint at the specified index
4484:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4485:    * @since 1.5
4486:    */
4487:   public static int codePointBefore(char[] chars, int index)
4488:   {
4489:     return codePointBefore(chars, index, 1);
4490:   }
4491: 
4492:   /**
4493:    * Get the code point before the specified index.  This is like
4494:    * #codePointAt(char[], int), but checks the characters at
4495:    * <code>index-1</code> and <code>index-2</code> to see if they form
4496:    * a supplementary code point.  If they do not, the character at
4497:    * <code>index-1</code> is returned.  The start parameter is used to
4498:    * limit the range of the array which may be examined.
4499:    *
4500:    * @param chars the character array
4501:    * @param index the index just past the codepoint to get, starting at 0
4502:    * @param start the index before which characters should not be examined
4503:    * @return the codepoint at the specified index
4504:    * @throws IndexOutOfBoundsException if index is &gt; start or &gt;
4505:    * the length of the array, or if limit is negative or &gt;= the
4506:    * length of the array
4507:    * @since 1.5
4508:    */
4509:   public static int codePointBefore(char[] chars, int index, int start)
4510:   {
4511:     if (index < start || index > chars.length
4512:     || start < 0 || start >= chars.length)
4513:       throw new IndexOutOfBoundsException();
4514:     --index;
4515:     char low = chars[index];
4516:     if (! isLowSurrogate(low) || --index < start)
4517:       return low;
4518:     char high = chars[index];
4519:     if (! isHighSurrogate(high))
4520:       return low;
4521:     return toCodePoint(high, low);
4522:   }
4523: 
4524:   /**
4525:    * Get the code point before the specified index.  This is like
4526:    * #codePointAt(CharSequence, int), but checks the characters at
4527:    * <code>index-1</code> and <code>index-2</code> to see if they form
4528:    * a supplementary code point.  If they do not, the character at
4529:    * <code>index-1</code> is returned.
4530:    *
4531:    * @param sequence the CharSequence
4532:    * @param index the index just past the codepoint to get, starting at 0
4533:    * @return the codepoint at the specified index
4534:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4535:    * @since 1.5
4536:    */
4537:   public static int codePointBefore(CharSequence sequence, int index)
4538:   {
4539:     int len = sequence.length();
4540:     if (index < 1 || index > len)
4541:       throw new IndexOutOfBoundsException();
4542:     --index;
4543:     char low = sequence.charAt(index);
4544:     if (! isLowSurrogate(low) || --index < 0)
4545:       return low;
4546:     char high = sequence.charAt(index);
4547:     if (! isHighSurrogate(high))
4548:       return low;
4549:     return toCodePoint(high, low);
4550:   }
4551: } // class Character
Overview Package Class Use Source Tree Index Deprecated About	GNU Classpath (0.95)
	GNU Classpath (0.95)	Frames \| No Frames