1:
37:
38:
39: package ;
40:
41: import ;
42:
43: import ;
44: import ;
45: import ;
46:
47:
73: public final class Character implements Serializable, Comparable<Character>
74: {
75:
82: public static class Subset
83: {
84:
85: private final String name;
86:
87:
93: protected Subset(String name)
94: {
95:
96: this.name = name.toString();
97: }
98:
99:
107: public final boolean equals(Object o)
108: {
109: return o == this;
110: }
111:
112:
118: public final int hashCode()
119: {
120: return super.hashCode();
121: }
122:
123:
128: public final String toString()
129: {
130: return name;
131: }
132: }
133:
134:
147: public static final class UnicodeBlock extends Subset
148: {
149:
150: private final int start;
151:
152:
153: private final int end;
154:
155:
156: private final String canonicalName;
157:
158:
159: private enum NameType { CANONICAL, NO_SPACES, CONSTANT; }
160:
161:
170: private UnicodeBlock(int start, int end, String name,
171: String canonicalName)
172: {
173: super(name);
174: this.start = start;
175: this.end = end;
176: this.canonicalName = canonicalName;
177: }
178:
179:
188: public static UnicodeBlock of(char ch)
189: {
190: return of((int) ch);
191: }
192:
193:
202: public static UnicodeBlock of(int codePoint)
203: {
204: if (codePoint > MAX_CODE_POINT)
205: throw new IllegalArgumentException("The supplied integer value is " +
206: "too large to be a codepoint.");
207:
208: int low = 0;
209: int hi = sets.length - 1;
210: while (low <= hi)
211: {
212: int mid = (low + hi) >> 1;
213: UnicodeBlock b = sets[mid];
214: if (codePoint < b.start)
215: hi = mid - 1;
216: else if (codePoint > b.end)
217: low = mid + 1;
218: else
219: return b;
220: }
221: return null;
222: }
223:
224:
258: public static final UnicodeBlock forName(String blockName)
259: {
260: NameType type;
261: if (blockName.indexOf(' ') != -1)
262: type = NameType.CANONICAL;
263: else if (blockName.indexOf('_') != -1)
264: type = NameType.CONSTANT;
265: else
266: type = NameType.NO_SPACES;
267: Collator usCollator = Collator.getInstance(Locale.US);
268: usCollator.setStrength(Collator.PRIMARY);
269:
270: switch (type)
271: {
272: case CANONICAL:
273: if (usCollator.compare(blockName, "Surrogates Area") == 0)
274: return SURROGATES_AREA;
275: break;
276: case NO_SPACES:
277: if (usCollator.compare(blockName, "SurrogatesArea") == 0)
278: return SURROGATES_AREA;
279: break;
280: case CONSTANT:
281: if (usCollator.compare(blockName, "SURROGATES_AREA") == 0)
282: return SURROGATES_AREA;
283: break;
284: }
285:
286: switch (type)
287: {
288: case CANONICAL:
289: for (UnicodeBlock block : sets)
290: if (usCollator.compare(blockName, block.canonicalName) == 0)
291: return block;
292: break;
293: case NO_SPACES:
294: for (UnicodeBlock block : sets)
295: {
296: String nsName = block.canonicalName.replaceAll(" ","");
297: if (usCollator.compare(blockName, nsName) == 0)
298: return block;
299: }
300: break;
301: case CONSTANT:
302: for (UnicodeBlock block : sets)
303: if (usCollator.compare(blockName, block.toString()) == 0)
304: return block;
305: break;
306: }
307: throw new IllegalArgumentException("No Unicode block found for " +
308: blockName + ".");
309: }
310:
311:
315: public static final UnicodeBlock BASIC_LATIN
316: = new UnicodeBlock(0x0000, 0x007F,
317: "BASIC_LATIN",
318: "Basic Latin");
319:
320:
324: public static final UnicodeBlock LATIN_1_SUPPLEMENT
325: = new UnicodeBlock(0x0080, 0x00FF,
326: "LATIN_1_SUPPLEMENT",
327: "Latin-1 Supplement");
328:
329:
333: public static final UnicodeBlock LATIN_EXTENDED_A
334: = new UnicodeBlock(0x0100, 0x017F,
335: "LATIN_EXTENDED_A",
336: "Latin Extended-A");
337:
338:
342: public static final UnicodeBlock LATIN_EXTENDED_B
343: = new UnicodeBlock(0x0180, 0x024F,
344: "LATIN_EXTENDED_B",
345: "Latin Extended-B");
346:
347:
351: public static final UnicodeBlock IPA_EXTENSIONS
352: = new UnicodeBlock(0x0250, 0x02AF,
353: "IPA_EXTENSIONS",
354: "IPA Extensions");
355:
356:
360: public static final UnicodeBlock SPACING_MODIFIER_LETTERS
361: = new UnicodeBlock(0x02B0, 0x02FF,
362: "SPACING_MODIFIER_LETTERS",
363: "Spacing Modifier Letters");
364:
365:
369: public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
370: = new UnicodeBlock(0x0300, 0x036F,
371: "COMBINING_DIACRITICAL_MARKS",
372: "Combining Diacritical Marks");
373:
374:
378: public static final UnicodeBlock GREEK
379: = new UnicodeBlock(0x0370, 0x03FF,
380: "GREEK",
381: "Greek");
382:
383:
387: public static final UnicodeBlock CYRILLIC
388: = new UnicodeBlock(0x0400, 0x04FF,
389: "CYRILLIC",
390: "Cyrillic");
391:
392:
397: public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
398: = new UnicodeBlock(0x0500, 0x052F,
399: "CYRILLIC_SUPPLEMENTARY",
400: "Cyrillic Supplementary");
401:
402:
406: public static final UnicodeBlock ARMENIAN
407: = new UnicodeBlock(0x0530, 0x058F,
408: "ARMENIAN",
409: "Armenian");
410:
411:
415: public static final UnicodeBlock HEBREW
416: = new UnicodeBlock(0x0590, 0x05FF,
417: "HEBREW",
418: "Hebrew");
419:
420:
424: public static final UnicodeBlock ARABIC
425: = new UnicodeBlock(0x0600, 0x06FF,
426: "ARABIC",
427: "Arabic");
428:
429:
434: public static final UnicodeBlock SYRIAC
435: = new UnicodeBlock(0x0700, 0x074F,
436: "SYRIAC",
437: "Syriac");
438:
439:
444: public static final UnicodeBlock THAANA
445: = new UnicodeBlock(0x0780, 0x07BF,
446: "THAANA",
447: "Thaana");
448:
449:
453: public static final UnicodeBlock DEVANAGARI
454: = new UnicodeBlock(0x0900, 0x097F,
455: "DEVANAGARI",
456: "Devanagari");
457:
458:
462: public static final UnicodeBlock BENGALI
463: = new UnicodeBlock(0x0980, 0x09FF,
464: "BENGALI",
465: "Bengali");
466:
467:
471: public static final UnicodeBlock GURMUKHI
472: = new UnicodeBlock(0x0A00, 0x0A7F,
473: "GURMUKHI",
474: "Gurmukhi");
475:
476:
480: public static final UnicodeBlock GUJARATI
481: = new UnicodeBlock(0x0A80, 0x0AFF,
482: "GUJARATI",
483: "Gujarati");
484:
485:
489: public static final UnicodeBlock ORIYA
490: = new UnicodeBlock(0x0B00, 0x0B7F,
491: "ORIYA",
492: "Oriya");
493:
494:
498: public static final UnicodeBlock TAMIL
499: = new UnicodeBlock(0x0B80, 0x0BFF,
500: "TAMIL",
501: "Tamil");
502:
503:
507: public static final UnicodeBlock TELUGU
508: = new UnicodeBlock(0x0C00, 0x0C7F,
509: "TELUGU",
510: "Telugu");
511:
512:
516: public static final UnicodeBlock KANNADA
517: = new UnicodeBlock(0x0C80, 0x0CFF,
518: "KANNADA",
519: "Kannada");
520:
521:
525: public static final UnicodeBlock MALAYALAM
526: = new UnicodeBlock(0x0D00, 0x0D7F,
527: "MALAYALAM",
528: "Malayalam");
529:
530:
535: public static final UnicodeBlock SINHALA
536: = new UnicodeBlock(0x0D80, 0x0DFF,
537: "SINHALA",
538: "Sinhala");
539:
540:
544: public static final UnicodeBlock THAI
545: = new UnicodeBlock(0x0E00, 0x0E7F,
546: "THAI",
547: "Thai");
548:
549:
553: public static final UnicodeBlock LAO
554: = new UnicodeBlock(0x0E80, 0x0EFF,
555: "LAO",
556: "Lao");
557:
558:
562: public static final UnicodeBlock TIBETAN
563: = new UnicodeBlock(0x0F00, 0x0FFF,
564: "TIBETAN",
565: "Tibetan");
566:
567:
572: public static final UnicodeBlock MYANMAR
573: = new UnicodeBlock(0x1000, 0x109F,
574: "MYANMAR",
575: "Myanmar");
576:
577:
581: public static final UnicodeBlock GEORGIAN
582: = new UnicodeBlock(0x10A0, 0x10FF,
583: "GEORGIAN",
584: "Georgian");
585:
586:
590: public static final UnicodeBlock HANGUL_JAMO
591: = new UnicodeBlock(0x1100, 0x11FF,
592: "HANGUL_JAMO",
593: "Hangul Jamo");
594:
595:
600: public static final UnicodeBlock ETHIOPIC
601: = new UnicodeBlock(0x1200, 0x137F,
602: "ETHIOPIC",
603: "Ethiopic");
604:
605:
610: public static final UnicodeBlock CHEROKEE
611: = new UnicodeBlock(0x13A0, 0x13FF,
612: "CHEROKEE",
613: "Cherokee");
614:
615:
620: public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
621: = new UnicodeBlock(0x1400, 0x167F,
622: "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
623: "Unified Canadian Aboriginal Syllabics");
624:
625:
630: public static final UnicodeBlock OGHAM
631: = new UnicodeBlock(0x1680, 0x169F,
632: "OGHAM",
633: "Ogham");
634:
635:
640: public static final UnicodeBlock RUNIC
641: = new UnicodeBlock(0x16A0, 0x16FF,
642: "RUNIC",
643: "Runic");
644:
645:
650: public static final UnicodeBlock TAGALOG
651: = new UnicodeBlock(0x1700, 0x171F,
652: "TAGALOG",
653: "Tagalog");
654:
655:
660: public static final UnicodeBlock HANUNOO
661: = new UnicodeBlock(0x1720, 0x173F,
662: "HANUNOO",
663: "Hanunoo");
664:
665:
670: public static final UnicodeBlock BUHID
671: = new UnicodeBlock(0x1740, 0x175F,
672: "BUHID",
673: "Buhid");
674:
675:
680: public static final UnicodeBlock TAGBANWA
681: = new UnicodeBlock(0x1760, 0x177F,
682: "TAGBANWA",
683: "Tagbanwa");
684:
685:
690: public static final UnicodeBlock KHMER
691: = new UnicodeBlock(0x1780, 0x17FF,
692: "KHMER",
693: "Khmer");
694:
695:
700: public static final UnicodeBlock MONGOLIAN
701: = new UnicodeBlock(0x1800, 0x18AF,
702: "MONGOLIAN",
703: "Mongolian");
704:
705:
710: public static final UnicodeBlock LIMBU
711: = new UnicodeBlock(0x1900, 0x194F,
712: "LIMBU",
713: "Limbu");
714:
715:
720: public static final UnicodeBlock TAI_LE
721: = new UnicodeBlock(0x1950, 0x197F,
722: "TAI_LE",
723: "Tai Le");
724:
725:
730: public static final UnicodeBlock KHMER_SYMBOLS
731: = new UnicodeBlock(0x19E0, 0x19FF,
732: "KHMER_SYMBOLS",
733: "Khmer Symbols");
734:
735:
740: public static final UnicodeBlock PHONETIC_EXTENSIONS
741: = new UnicodeBlock(0x1D00, 0x1D7F,
742: "PHONETIC_EXTENSIONS",
743: "Phonetic Extensions");
744:
745:
749: public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
750: = new UnicodeBlock(0x1E00, 0x1EFF,
751: "LATIN_EXTENDED_ADDITIONAL",
752: "Latin Extended Additional");
753:
754:
758: public static final UnicodeBlock GREEK_EXTENDED
759: = new UnicodeBlock(0x1F00, 0x1FFF,
760: "GREEK_EXTENDED",
761: "Greek Extended");
762:
763:
767: public static final UnicodeBlock GENERAL_PUNCTUATION
768: = new UnicodeBlock(0x2000, 0x206F,
769: "GENERAL_PUNCTUATION",
770: "General Punctuation");
771:
772:
776: public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
777: = new UnicodeBlock(0x2070, 0x209F,
778: "SUPERSCRIPTS_AND_SUBSCRIPTS",
779: "Superscripts and Subscripts");
780:
781:
785: public static final UnicodeBlock CURRENCY_SYMBOLS
786: = new UnicodeBlock(0x20A0, 0x20CF,
787: "CURRENCY_SYMBOLS",
788: "Currency Symbols");
789:
790:
794: public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
795: = new UnicodeBlock(0x20D0, 0x20FF,
796: "COMBINING_MARKS_FOR_SYMBOLS",
797: "Combining Marks for Symbols");
798:
799:
803: public static final UnicodeBlock LETTERLIKE_SYMBOLS
804: = new UnicodeBlock(0x2100, 0x214F,
805: "LETTERLIKE_SYMBOLS",
806: "Letterlike Symbols");
807:
808:
812: public static final UnicodeBlock NUMBER_FORMS
813: = new UnicodeBlock(0x2150, 0x218F,
814: "NUMBER_FORMS",
815: "Number Forms");
816:
817:
821: public static final UnicodeBlock ARROWS
822: = new UnicodeBlock(0x2190, 0x21FF,
823: "ARROWS",
824: "Arrows");
825:
826:
830: public static final UnicodeBlock MATHEMATICAL_OPERATORS
831: = new UnicodeBlock(0x2200, 0x22FF,
832: "MATHEMATICAL_OPERATORS",
833: "Mathematical Operators");
834:
835:
839: public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
840: = new UnicodeBlock(0x2300, 0x23FF,
841: "MISCELLANEOUS_TECHNICAL",
842: "Miscellaneous Technical");
843:
844:
848: public static final UnicodeBlock CONTROL_PICTURES
849: = new UnicodeBlock(0x2400, 0x243F,
850: "CONTROL_PICTURES",
851: "Control Pictures");
852:
853:
857: public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
858: = new UnicodeBlock(0x2440, 0x245F,
859: "OPTICAL_CHARACTER_RECOGNITION",
860: "Optical Character Recognition");
861:
862:
866: public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
867: = new UnicodeBlock(0x2460, 0x24FF,
868: "ENCLOSED_ALPHANUMERICS",
869: "Enclosed Alphanumerics");
870:
871:
875: public static final UnicodeBlock BOX_DRAWING
876: = new UnicodeBlock(0x2500, 0x257F,
877: "BOX_DRAWING",
878: "Box Drawing");
879:
880:
884: public static final UnicodeBlock BLOCK_ELEMENTS
885: = new UnicodeBlock(0x2580, 0x259F,
886: "BLOCK_ELEMENTS",
887: "Block Elements");
888:
889:
893: public static final UnicodeBlock GEOMETRIC_SHAPES
894: = new UnicodeBlock(0x25A0, 0x25FF,
895: "GEOMETRIC_SHAPES",
896: "Geometric Shapes");
897:
898:
902: public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
903: = new UnicodeBlock(0x2600, 0x26FF,
904: "MISCELLANEOUS_SYMBOLS",
905: "Miscellaneous Symbols");
906:
907:
911: public static final