Source for java.lang.Character

   1: /* java.lang.Character -- Wrapper class for char, and Unicode subsets
   2:    Copyright (C) 1998, 1999, 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.lang;
  40: 
  41: import gnu.java.lang.CharData;
  42: 
  43: import java.io.Serializable;
  44: import java.text.Collator;
  45: import java.util.Locale;
  46: 
  47: /**
  48:  * Wrapper class for the primitive char data type.  In addition, this class
  49:  * allows one to retrieve property information and perform transformations
  50:  * on the defined characters in the Unicode Standard, Version 4.0.0.
  51:  * java.lang.Character is designed to be very dynamic, and as such, it
  52:  * retrieves information on the Unicode character set from a separate
  53:  * database, gnu.java.lang.CharData, which can be easily upgraded.
  54:  *
  55:  * <p>For predicates, boundaries are used to describe
  56:  * the set of characters for which the method will return true.
  57:  * This syntax uses fairly normal regular expression notation.
  58:  * See 5.13 of the Unicode Standard, Version 4.0, for the
  59:  * boundary specification.
  60:  *
  61:  * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
  62:  * for more information on the Unicode Standard.
  63:  *
  64:  * @author Tom Tromey (tromey@cygnus.com)
  65:  * @author Paul N. Fisher
  66:  * @author Jochen Hoenicke
  67:  * @author Eric Blake (ebb9@email.byu.edu)
  68:  * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
  69:  * @see CharData
  70:  * @since 1.0
  71:  * @status partly updated to 1.5; some things still missing
  72:  */
  73: public final class Character implements Serializable, Comparable<Character>
  74: {
  75:   /**
  76:    * A subset of Unicode blocks.
  77:    *
  78:    * @author Paul N. Fisher
  79:    * @author Eric Blake (ebb9@email.byu.edu)
  80:    * @since 1.2
  81:    */
  82:   public static class Subset
  83:   {
  84:     /** The name of the subset. */
  85:     private final String name;
  86: 
  87:     /**
  88:      * Construct a new subset of characters.
  89:      *
  90:      * @param name the name of the subset
  91:      * @throws NullPointerException if name is null
  92:      */
  93:     protected Subset(String name)
  94:     {
  95:       // Note that name.toString() is name, unless name was null.
  96:       this.name = name.toString();
  97:     }
  98: 
  99:     /**
 100:      * Compares two Subsets for equality. This is <code>final</code>, and
 101:      * restricts the comparison on the <code>==</code> operator, so it returns
 102:      * true only for the same object.
 103:      *
 104:      * @param o the object to compare
 105:      * @return true if o is this
 106:      */
 107:     public final boolean equals(Object o)
 108:     {
 109:       return o == this;
 110:     }
 111: 
 112:     /**
 113:      * Makes the original hashCode of Object final, to be consistent with
 114:      * equals.
 115:      *
 116:      * @return the hash code for this object
 117:      */
 118:     public final int hashCode()
 119:     {
 120:       return super.hashCode();
 121:     }
 122: 
 123:     /**
 124:      * Returns the name of the subset.
 125:      *
 126:      * @return the name
 127:      */
 128:     public final String toString()
 129:     {
 130:       return name;
 131:     }
 132:   } // class Subset
 133: 
 134:   /**
 135:    * A family of character subsets in the Unicode specification. A character
 136:    * is in at most one of these blocks.
 137:    *
 138:    * This inner class was generated automatically from
 139:    * <code>doc/unicode/Blocks-4.0.0.txt</code>, by some perl scripts.
 140:    * This Unicode definition file can be found on the
 141:    * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
 142:    * JDK 1.5 uses Unicode version 4.0.0.
 143:    *
 144:    * @author scripts/unicode-blocks.pl (written by Eric Blake)
 145:    * @since 1.2
 146:    */
 147:   public static final class UnicodeBlock extends Subset
 148:   {
 149:     /** The start of the subset. */
 150:     private final int start;
 151: 
 152:     /** The end of the subset. */
 153:     private final int end;
 154: 
 155:     /** The canonical name of the block according to the Unicode standard. */
 156:     private final String canonicalName;
 157: 
 158:     /** Enumeration for the <code>forName()</code> method */
 159:     private enum NameType { CANONICAL, NO_SPACES, CONSTANT; }
 160: 
 161:     /**
 162:      * Constructor for strictly defined blocks.
 163:      *
 164:      * @param start the start character of the range
 165:      * @param end the end character of the range
 166:      * @param name the block name
 167:      * @param canonicalName the name of the block as defined in the Unicode
 168:      *        standard.
 169:      */
 170:     private UnicodeBlock(int start, int end, String name,
 171:              String canonicalName)
 172:     {
 173:       super(name);
 174:       this.start = start;
 175:       this.end = end;
 176:       this.canonicalName = canonicalName;
 177:     }
 178: 
 179:     /**
 180:      * Returns the Unicode character block which a character belongs to.
 181:      * <strong>Note</strong>: This method does not support the use of
 182:      * supplementary characters.  For such support, <code>of(int)</code>
 183:      * should be used instead.
 184:      *
 185:      * @param ch the character to look up
 186:      * @return the set it belongs to, or null if it is not in one
 187:      */
 188:     public static UnicodeBlock of(char ch)
 189:     {
 190:       return of((int) ch);
 191:     }
 192: 
 193:     /**
 194:      * Returns the Unicode character block which a code point belongs to.
 195:      *
 196:      * @param codePoint the character to look up
 197:      * @return the set it belongs to, or null if it is not in one.
 198:      * @throws IllegalArgumentException if the specified code point is
 199:      *         invalid.
 200:      * @since 1.5
 201:      */
 202:     public static UnicodeBlock of(int codePoint)
 203:     {
 204:       if (codePoint > MAX_CODE_POINT)
 205:     throw new IllegalArgumentException("The supplied integer value is " +
 206:                        "too large to be a codepoint.");
 207:       // Simple binary search for the correct block.
 208:       int low = 0;
 209:       int hi = sets.length - 1;
 210:       while (low <= hi)
 211:         {
 212:           int mid = (low + hi) >> 1;
 213:           UnicodeBlock b = sets[mid];
 214:           if (codePoint < b.start)
 215:             hi = mid - 1;
 216:           else if (codePoint > b.end)
 217:             low = mid + 1;
 218:           else
 219:             return b;
 220:         }
 221:       return null;
 222:     }
 223: 
 224:     /**
 225:      * <p>
 226:      * Returns the <code>UnicodeBlock</code> with the given name, as defined
 227:      * by the Unicode standard.  The version of Unicode in use is defined by
 228:      * the <code>Character</code> class, and the names are given in the
 229:      * <code>Blocks-<version>.txt</code> file corresponding to that version.
 230:      * The name may be specified in one of three ways:
 231:      * </p>
 232:      * <ol>
 233:      * <li>The canonical, human-readable name used by the Unicode standard.
 234:      * This is the name with all spaces and hyphens retained.  For example,
 235:      * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li>
 236:      * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li>
 237:      * <li>The name used for the constants specified by this class, which
 238:      * is the canonical name with all spaces and hyphens replaced with
 239:      * underscores e.g. `BASIC_LATIN'</li>
 240:      * </ol>
 241:      * <p>
 242:      * The names are compared case-insensitively using the case comparison
 243:      * associated with the U.S. English locale.  The method recognises the
 244:      * previous names used for blocks as well as the current ones.  At
 245:      * present, this simply means that the deprecated `SURROGATES_AREA'
 246:      * will be recognised by this method (the <code>of()</code> methods
 247:      * only return one of the three new surrogate blocks).
 248:      * </p>
 249:      *
 250:      * @param blockName the name of the block to look up.
 251:      * @return the specified block.
 252:      * @throws NullPointerException if the <code>blockName</code> is
 253:      *         <code>null</code>.
 254:      * @throws IllegalArgumentException if the name does not match any Unicode
 255:      *         block.
 256:      * @since 1.5
 257:      */
 258:     public static final UnicodeBlock forName(String blockName)
 259:     {
 260:       NameType type;
 261:       if (blockName.indexOf(' ') != -1)
 262:         type = NameType.CANONICAL;
 263:       else if (blockName.indexOf('_') != -1)
 264:         type = NameType.CONSTANT;
 265:       else
 266:         type = NameType.NO_SPACES;
 267:       Collator usCollator = Collator.getInstance(Locale.US);
 268:       usCollator.setStrength(Collator.PRIMARY);
 269:       /* Special case for deprecated blocks not in sets */
 270:       switch (type)
 271:       {
 272:         case CANONICAL:
 273:           if (usCollator.compare(blockName, "Surrogates Area") == 0)
 274:             return SURROGATES_AREA;
 275:           break;
 276:         case NO_SPACES:
 277:           if (usCollator.compare(blockName, "SurrogatesArea") == 0)
 278:             return SURROGATES_AREA;
 279:           break;
 280:         case CONSTANT:
 281:           if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) 
 282:             return SURROGATES_AREA;
 283:           break;
 284:       }
 285:       /* Other cases */
 286:       switch (type)
 287:       {
 288:         case CANONICAL:
 289:           for (UnicodeBlock block : sets)
 290:             if (usCollator.compare(blockName, block.canonicalName) == 0)
 291:               return block;
 292:           break;
 293:         case NO_SPACES:
 294:           for (UnicodeBlock block : sets)
 295:         {
 296:           String nsName = block.canonicalName.replaceAll(" ","");
 297:           if (usCollator.compare(blockName, nsName) == 0)
 298:         return block;
 299:         }
 300:       break;
 301:         case CONSTANT:
 302:           for (UnicodeBlock block : sets)
 303:             if (usCollator.compare(blockName, block.toString()) == 0)
 304:               return block;
 305:           break;
 306:       }
 307:       throw new IllegalArgumentException("No Unicode block found for " +
 308:                                          blockName + ".");
 309:     }
 310: 
 311:     /**
 312:      * Basic Latin.
 313:      * 0x0000 - 0x007F.
 314:      */
 315:     public static final UnicodeBlock BASIC_LATIN
 316:       = new UnicodeBlock(0x0000, 0x007F,
 317:                          "BASIC_LATIN", 
 318:                          "Basic Latin");
 319: 
 320:     /**
 321:      * Latin-1 Supplement.
 322:      * 0x0080 - 0x00FF.
 323:      */
 324:     public static final UnicodeBlock LATIN_1_SUPPLEMENT
 325:       = new UnicodeBlock(0x0080, 0x00FF,
 326:                          "LATIN_1_SUPPLEMENT", 
 327:                          "Latin-1 Supplement");
 328: 
 329:     /**
 330:      * Latin Extended-A.
 331:      * 0x0100 - 0x017F.
 332:      */
 333:     public static final UnicodeBlock LATIN_EXTENDED_A
 334:       = new UnicodeBlock(0x0100, 0x017F,
 335:                          "LATIN_EXTENDED_A", 
 336:                          "Latin Extended-A");
 337: 
 338:     /**
 339:      * Latin Extended-B.
 340:      * 0x0180 - 0x024F.
 341:      */
 342:     public static final UnicodeBlock LATIN_EXTENDED_B
 343:       = new UnicodeBlock(0x0180, 0x024F,
 344:                          "LATIN_EXTENDED_B", 
 345:                          "Latin Extended-B");
 346: 
 347:     /**
 348:      * IPA Extensions.
 349:      * 0x0250 - 0x02AF.
 350:      */
 351:     public static final UnicodeBlock IPA_EXTENSIONS
 352:       = new UnicodeBlock(0x0250, 0x02AF,
 353:                          "IPA_EXTENSIONS", 
 354:                          "IPA Extensions");
 355: 
 356:     /**
 357:      * Spacing Modifier Letters.
 358:      * 0x02B0 - 0x02FF.
 359:      */
 360:     public static final UnicodeBlock SPACING_MODIFIER_LETTERS
 361:       = new UnicodeBlock(0x02B0, 0x02FF,
 362:                          "SPACING_MODIFIER_LETTERS", 
 363:                          "Spacing Modifier Letters");
 364: 
 365:     /**
 366:      * Combining Diacritical Marks.
 367:      * 0x0300 - 0x036F.
 368:      */
 369:     public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
 370:       = new UnicodeBlock(0x0300, 0x036F,
 371:                          "COMBINING_DIACRITICAL_MARKS", 
 372:                          "Combining Diacritical Marks");
 373: 
 374:     /**
 375:      * Greek.
 376:      * 0x0370 - 0x03FF.
 377:      */
 378:     public static final UnicodeBlock GREEK
 379:       = new UnicodeBlock(0x0370, 0x03FF,
 380:                          "GREEK", 
 381:                          "Greek");
 382: 
 383:     /**
 384:      * Cyrillic.
 385:      * 0x0400 - 0x04FF.
 386:      */
 387:     public static final UnicodeBlock CYRILLIC
 388:       = new UnicodeBlock(0x0400, 0x04FF,
 389:                          "CYRILLIC", 
 390:                          "Cyrillic");
 391: 
 392:     /**
 393:      * Cyrillic Supplementary.
 394:      * 0x0500 - 0x052F.
 395:      * @since 1.5
 396:      */
 397:     public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
 398:       = new UnicodeBlock(0x0500, 0x052F,
 399:                          "CYRILLIC_SUPPLEMENTARY", 
 400:                          "Cyrillic Supplementary");
 401: 
 402:     /**
 403:      * Armenian.
 404:      * 0x0530 - 0x058F.
 405:      */
 406:     public static final UnicodeBlock ARMENIAN
 407:       = new UnicodeBlock(0x0530, 0x058F,
 408:                          "ARMENIAN", 
 409:                          "Armenian");
 410: 
 411:     /**
 412:      * Hebrew.
 413:      * 0x0590 - 0x05FF.
 414:      */
 415:     public static final UnicodeBlock HEBREW
 416:       = new UnicodeBlock(0x0590, 0x05FF,
 417:                          "HEBREW", 
 418:                          "Hebrew");
 419: 
 420:     /**
 421:      * Arabic.
 422:      * 0x0600 - 0x06FF.
 423:      */
 424:     public static final UnicodeBlock ARABIC
 425:       = new UnicodeBlock(0x0600, 0x06FF,
 426:                          "ARABIC", 
 427:                          "Arabic");
 428: 
 429:     /**
 430:      * Syriac.
 431:      * 0x0700 - 0x074F.
 432:      * @since 1.4
 433:      */
 434:     public static final UnicodeBlock SYRIAC
 435:       = new UnicodeBlock(0x0700, 0x074F,
 436:                          "SYRIAC", 
 437:                          "Syriac");
 438: 
 439:     /**
 440:      * Thaana.
 441:      * 0x0780 - 0x07BF.
 442:      * @since 1.4
 443:      */
 444:     public static final UnicodeBlock THAANA
 445:       = new UnicodeBlock(0x0780, 0x07BF,
 446:                          "THAANA", 
 447:                          "Thaana");
 448: 
 449:     /**
 450:      * Devanagari.
 451:      * 0x0900 - 0x097F.
 452:      */
 453:     public static final UnicodeBlock DEVANAGARI
 454:       = new UnicodeBlock(0x0900, 0x097F,
 455:                          "DEVANAGARI", 
 456:                          "Devanagari");
 457: 
 458:     /**
 459:      * Bengali.
 460:      * 0x0980 - 0x09FF.
 461:      */
 462:     public static final UnicodeBlock BENGALI
 463:       = new UnicodeBlock(0x0980, 0x09FF,
 464:                          "BENGALI", 
 465:                          "Bengali");
 466: 
 467:     /**
 468:      * Gurmukhi.
 469:      * 0x0A00 - 0x0A7F.
 470:      */
 471:     public static final UnicodeBlock GURMUKHI
 472:       = new UnicodeBlock(0x0A00, 0x0A7F,
 473:                          "GURMUKHI", 
 474:                          "Gurmukhi");
 475: 
 476:     /**
 477:      * Gujarati.
 478:      * 0x0A80 - 0x0AFF.
 479:      */
 480:     public static final UnicodeBlock GUJARATI
 481:       = new UnicodeBlock(0x0A80, 0x0AFF,
 482:                          "GUJARATI", 
 483:                          "Gujarati");
 484: 
 485:     /**
 486:      * Oriya.
 487:      * 0x0B00 - 0x0B7F.
 488:      */
 489:     public static final UnicodeBlock ORIYA
 490:       = new UnicodeBlock(0x0B00, 0x0B7F,
 491:                          "ORIYA", 
 492:                          "Oriya");
 493: 
 494:     /**
 495:      * Tamil.
 496:      * 0x0B80 - 0x0BFF.
 497:      */
 498:     public static final UnicodeBlock TAMIL
 499:       = new UnicodeBlock(0x0B80, 0x0BFF,
 500:                          "TAMIL", 
 501:                          "Tamil");
 502: 
 503:     /**
 504:      * Telugu.
 505:      * 0x0C00 - 0x0C7F.
 506:      */
 507:     public static final UnicodeBlock TELUGU
 508:       = new UnicodeBlock(0x0C00, 0x0C7F,
 509:                          "TELUGU", 
 510:                          "Telugu");
 511: 
 512:     /**
 513:      * Kannada.
 514:      * 0x0C80 - 0x0CFF.
 515:      */
 516:     public static final UnicodeBlock KANNADA
 517:       = new UnicodeBlock(0x0C80, 0x0CFF,
 518:                          "KANNADA", 
 519:                          "Kannada");
 520: 
 521:     /**
 522:      * Malayalam.
 523:      * 0x0D00 - 0x0D7F.
 524:      */
 525:     public static final UnicodeBlock MALAYALAM
 526:       = new UnicodeBlock(0x0D00, 0x0D7F,
 527:                          "MALAYALAM", 
 528:                          "Malayalam");
 529: 
 530:     /**
 531:      * Sinhala.
 532:      * 0x0D80 - 0x0DFF.
 533:      * @since 1.4
 534:      */
 535:     public static final UnicodeBlock SINHALA
 536:       = new UnicodeBlock(0x0D80, 0x0DFF,
 537:                          "SINHALA", 
 538:                          "Sinhala");
 539: 
 540:     /**
 541:      * Thai.
 542:      * 0x0E00 - 0x0E7F.
 543:      */
 544:     public static final UnicodeBlock THAI
 545:       = new UnicodeBlock(0x0E00, 0x0E7F,
 546:                          "THAI", 
 547:                          "Thai");
 548: 
 549:     /**
 550:      * Lao.
 551:      * 0x0E80 - 0x0EFF.
 552:      */
 553:     public static final UnicodeBlock LAO
 554:       = new UnicodeBlock(0x0E80, 0x0EFF,
 555:                          "LAO", 
 556:                          "Lao");
 557: 
 558:     /**
 559:      * Tibetan.
 560:      * 0x0F00 - 0x0FFF.
 561:      */
 562:     public static final UnicodeBlock TIBETAN
 563:       = new UnicodeBlock(0x0F00, 0x0FFF,
 564:                          "TIBETAN", 
 565:                          "Tibetan");
 566: 
 567:     /**
 568:      * Myanmar.
 569:      * 0x1000 - 0x109F.
 570:      * @since 1.4
 571:      */
 572:     public static final UnicodeBlock MYANMAR
 573:       = new UnicodeBlock(0x1000, 0x109F,
 574:                          "MYANMAR", 
 575:                          "Myanmar");
 576: 
 577:     /**
 578:      * Georgian.
 579:      * 0x10A0 - 0x10FF.
 580:      */
 581:     public static final UnicodeBlock GEORGIAN
 582:       = new UnicodeBlock(0x10A0, 0x10FF,
 583:                          "GEORGIAN", 
 584:                          "Georgian");
 585: 
 586:     /**
 587:      * Hangul Jamo.
 588:      * 0x1100 - 0x11FF.
 589:      */
 590:     public static final UnicodeBlock HANGUL_JAMO
 591:       = new UnicodeBlock(0x1100, 0x11FF,
 592:                          "HANGUL_JAMO", 
 593:                          "Hangul Jamo");
 594: 
 595:     /**
 596:      * Ethiopic.
 597:      * 0x1200 - 0x137F.
 598:      * @since 1.4
 599:      */
 600:     public static final UnicodeBlock ETHIOPIC
 601:       = new UnicodeBlock(0x1200, 0x137F,
 602:                          "ETHIOPIC", 
 603:                          "Ethiopic");
 604: 
 605:     /**
 606:      * Cherokee.
 607:      * 0x13A0 - 0x13FF.
 608:      * @since 1.4
 609:      */
 610:     public static final UnicodeBlock CHEROKEE
 611:       = new UnicodeBlock(0x13A0, 0x13FF,
 612:                          "CHEROKEE", 
 613:                          "Cherokee");
 614: 
 615:     /**
 616:      * Unified Canadian Aboriginal Syllabics.
 617:      * 0x1400 - 0x167F.
 618:      * @since 1.4
 619:      */
 620:     public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
 621:       = new UnicodeBlock(0x1400, 0x167F,
 622:                          "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 
 623:                          "Unified Canadian Aboriginal Syllabics");
 624: 
 625:     /**
 626:      * Ogham.
 627:      * 0x1680 - 0x169F.
 628:      * @since 1.4
 629:      */
 630:     public static final UnicodeBlock OGHAM
 631:       = new UnicodeBlock(0x1680, 0x169F,
 632:                          "OGHAM", 
 633:                          "Ogham");
 634: 
 635:     /**
 636:      * Runic.
 637:      * 0x16A0 - 0x16FF.
 638:      * @since 1.4
 639:      */
 640:     public static final UnicodeBlock RUNIC
 641:       = new UnicodeBlock(0x16A0, 0x16FF,
 642:                          "RUNIC", 
 643:                          "Runic");
 644: 
 645:     /**
 646:      * Tagalog.
 647:      * 0x1700 - 0x171F.
 648:      * @since 1.5
 649:      */
 650:     public static final UnicodeBlock TAGALOG
 651:       = new UnicodeBlock(0x1700, 0x171F,
 652:                          "TAGALOG", 
 653:                          "Tagalog");
 654: 
 655:     /**
 656:      * Hanunoo.
 657:      * 0x1720 - 0x173F.
 658:      * @since 1.5
 659:      */
 660:     public static final UnicodeBlock HANUNOO
 661:       = new UnicodeBlock(0x1720, 0x173F,
 662:                          "HANUNOO", 
 663:                          "Hanunoo");
 664: 
 665:     /**
 666:      * Buhid.
 667:      * 0x1740 - 0x175F.
 668:      * @since 1.5
 669:      */
 670:     public static final UnicodeBlock BUHID
 671:       = new UnicodeBlock(0x1740, 0x175F,
 672:                          "BUHID", 
 673:                          "Buhid");
 674: 
 675:     /**
 676:      * Tagbanwa.
 677:      * 0x1760 - 0x177F.
 678:      * @since 1.5
 679:      */
 680:     public static final UnicodeBlock TAGBANWA
 681:       = new UnicodeBlock(0x1760, 0x177F,
 682:                          "TAGBANWA", 
 683:                          "Tagbanwa");
 684: 
 685:     /**
 686:      * Khmer.
 687:      * 0x1780 - 0x17FF.
 688:      * @since 1.4
 689:      */
 690:     public static final UnicodeBlock KHMER
 691:       = new UnicodeBlock(0x1780, 0x17FF,
 692:                          "KHMER", 
 693:                          "Khmer");
 694: 
 695:     /**
 696:      * Mongolian.
 697:      * 0x1800 - 0x18AF.
 698:      * @since 1.4
 699:      */
 700:     public static final UnicodeBlock MONGOLIAN
 701:       = new UnicodeBlock(0x1800, 0x18AF,
 702:                          "MONGOLIAN", 
 703:                          "Mongolian");
 704: 
 705:     /**
 706:      * Limbu.
 707:      * 0x1900 - 0x194F.
 708:      * @since 1.5
 709:      */
 710:     public static final UnicodeBlock LIMBU
 711:       = new UnicodeBlock(0x1900, 0x194F,
 712:                          "LIMBU", 
 713:                          "Limbu");
 714: 
 715:     /**
 716:      * Tai Le.
 717:      * 0x1950 - 0x197F.
 718:      * @since 1.5
 719:      */
 720:     public static final UnicodeBlock TAI_LE
 721:       = new UnicodeBlock(0x1950, 0x197F,
 722:                          "TAI_LE", 
 723:                          "Tai Le");
 724: 
 725:     /**
 726:      * Khmer Symbols.
 727:      * 0x19E0 - 0x19FF.
 728:      * @since 1.5
 729:      */
 730:     public static final UnicodeBlock KHMER_SYMBOLS
 731:       = new UnicodeBlock(0x19E0, 0x19FF,
 732:                          "KHMER_SYMBOLS", 
 733:                          "Khmer Symbols");
 734: 
 735:     /**
 736:      * Phonetic Extensions.
 737:      * 0x1D00 - 0x1D7F.
 738:      * @since 1.5
 739:      */
 740:     public static final UnicodeBlock PHONETIC_EXTENSIONS
 741:       = new UnicodeBlock(0x1D00, 0x1D7F,
 742:                          "PHONETIC_EXTENSIONS", 
 743:                          "Phonetic Extensions");
 744: 
 745:     /**
 746:      * Latin Extended Additional.
 747:      * 0x1E00 - 0x1EFF.
 748:      */
 749:     public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
 750:       = new UnicodeBlock(0x1E00, 0x1EFF,
 751:                          "LATIN_EXTENDED_ADDITIONAL", 
 752:                          "Latin Extended Additional");
 753: 
 754:     /**
 755:      * Greek Extended.
 756:      * 0x1F00 - 0x1FFF.
 757:      */
 758:     public static final UnicodeBlock GREEK_EXTENDED
 759:       = new UnicodeBlock(0x1F00, 0x1FFF,
 760:                          "GREEK_EXTENDED", 
 761:                          "Greek Extended");
 762: 
 763:     /**
 764:      * General Punctuation.
 765:      * 0x2000 - 0x206F.
 766:      */
 767:     public static final UnicodeBlock GENERAL_PUNCTUATION
 768:       = new UnicodeBlock(0x2000, 0x206F,
 769:                          "GENERAL_PUNCTUATION", 
 770:                          "General Punctuation");
 771: 
 772:     /**
 773:      * Superscripts and Subscripts.
 774:      * 0x2070 - 0x209F.
 775:      */
 776:     public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
 777:       = new UnicodeBlock(0x2070, 0x209F,
 778:                          "SUPERSCRIPTS_AND_SUBSCRIPTS", 
 779:                          "Superscripts and Subscripts");
 780: 
 781:     /**
 782:      * Currency Symbols.
 783:      * 0x20A0 - 0x20CF.
 784:      */
 785:     public static final UnicodeBlock CURRENCY_SYMBOLS
 786:       = new UnicodeBlock(0x20A0, 0x20CF,
 787:                          "CURRENCY_SYMBOLS", 
 788:                          "Currency Symbols");
 789: 
 790:     /**
 791:      * Combining Marks for Symbols.
 792:      * 0x20D0 - 0x20FF.
 793:      */
 794:     public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
 795:       = new UnicodeBlock(0x20D0, 0x20FF,
 796:                          "COMBINING_MARKS_FOR_SYMBOLS", 
 797:                          "Combining Marks for Symbols");
 798: 
 799:     /**
 800:      * Letterlike Symbols.
 801:      * 0x2100 - 0x214F.
 802:      */
 803:     public static final UnicodeBlock LETTERLIKE_SYMBOLS
 804:       = new UnicodeBlock(0x2100, 0x214F,
 805:                          "LETTERLIKE_SYMBOLS", 
 806:                          "Letterlike Symbols");
 807: 
 808:     /**
 809:      * Number Forms.
 810:      * 0x2150 - 0x218F.
 811:      */
 812:     public static final UnicodeBlock NUMBER_FORMS
 813:       = new UnicodeBlock(0x2150, 0x218F,
 814:                          "NUMBER_FORMS", 
 815:                          "Number Forms");
 816: 
 817:     /**
 818:      * Arrows.
 819:      * 0x2190 - 0x21FF.
 820:      */
 821:     public static final UnicodeBlock ARROWS
 822:       = new UnicodeBlock(0x2190, 0x21FF,
 823:                          "ARROWS", 
 824:                          "Arrows");
 825: 
 826:     /**
 827:      * Mathematical Operators.
 828:      * 0x2200 - 0x22FF.
 829:      */
 830:     public static final UnicodeBlock MATHEMATICAL_OPERATORS
 831:       = new UnicodeBlock(0x2200, 0x22FF,
 832:                          "MATHEMATICAL_OPERATORS", 
 833:                          "Mathematical Operators");
 834: 
 835:     /**
 836:      * Miscellaneous Technical.
 837:      * 0x2300 - 0x23FF.
 838:      */
 839:     public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
 840:       = new UnicodeBlock(0x2300, 0x23FF,
 841:                          "MISCELLANEOUS_TECHNICAL", 
 842:                          "Miscellaneous Technical");
 843: 
 844:     /**
 845:      * Control Pictures.
 846:      * 0x2400 - 0x243F.
 847:      */
 848:     public static final UnicodeBlock CONTROL_PICTURES
 849:       = new UnicodeBlock(0x2400, 0x243F,
 850:                          "CONTROL_PICTURES", 
 851:                          "Control Pictures");
 852: 
 853:     /**
 854:      * Optical Character Recognition.
 855:      * 0x2440 - 0x245F.
 856:      */
 857:     public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
 858:       = new UnicodeBlock(0x2440, 0x245F,
 859:                          "OPTICAL_CHARACTER_RECOGNITION", 
 860:                          "Optical Character Recognition");
 861: 
 862:     /**
 863:      * Enclosed Alphanumerics.
 864:      * 0x2460 - 0x24FF.
 865:      */
 866:     public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
 867:       = new UnicodeBlock(0x2460, 0x24FF,
 868:                          "ENCLOSED_ALPHANUMERICS", 
 869:                          "Enclosed Alphanumerics");
 870: 
 871:     /**
 872:      * Box Drawing.
 873:      * 0x2500 - 0x257F.
 874:      */
 875:     public static final UnicodeBlock BOX_DRAWING
 876:       = new UnicodeBlock(0x2500, 0x257F,
 877:                          "BOX_DRAWING", 
 878:                          "Box Drawing");
 879: 
 880:     /**
 881:      * Block Elements.
 882:      * 0x2580 - 0x259F.
 883:      */
 884:     public static final UnicodeBlock BLOCK_ELEMENTS
 885:       = new UnicodeBlock(0x2580, 0x259F,
 886:                          "BLOCK_ELEMENTS", 
 887:                          "Block Elements");
 888: 
 889:     /**
 890:      * Geometric Shapes.
 891:      * 0x25A0 - 0x25FF.
 892:      */
 893:     public static final UnicodeBlock GEOMETRIC_SHAPES
 894:       = new UnicodeBlock(0x25A0, 0x25FF,
 895:                          "GEOMETRIC_SHAPES", 
 896:                          "Geometric Shapes");
 897: 
 898:     /**
 899:      * Miscellaneous Symbols.
 900:      * 0x2600 - 0x26FF.
 901:      */
 902:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
 903:       = new UnicodeBlock(0x2600, 0x26FF,
 904:                          "MISCELLANEOUS_SYMBOLS", 
 905:                          "Miscellaneous Symbols");
 906: 
 907:     /**
 908:      * Dingbats.
 909:      * 0x2700 - 0x27BF.
 910:      */
 911:     public static final