GNU Classpath (0.95) | |
Frames | No Frames |
1: /* RuleBasedCollator.java -- Concrete Collator Class 2: Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package java.text; 40: 41: import gnu.classpath.NotImplementedException; 42: 43: import java.util.ArrayList; 44: import java.util.HashMap; 45: 46: /* Written using "Java Class Libraries", 2nd edition, plus online 47: * API docs for JDK 1.2 from http://www.javasoft.com. 48: * Status: Believed complete and correct 49: */ 50: 51: /** 52: * This class is a concrete subclass of <code>Collator</code> suitable 53: * for string collation in a wide variety of languages. An instance of 54: * this class is normally returned by the <code>getInstance</code> method 55: * of <code>Collator</code> with rules predefined for the requested 56: * locale. However, an instance of this class can be created manually 57: * with any desired rules. 58: * <p> 59: * Rules take the form of a <code>String</code> with the following syntax 60: * <ul> 61: * <li> Modifier: '@'</li> 62: * <li> Relation: '<' | ';' | ',' | '=' : <text></li> 63: * <li> Reset: '&' : <text></li> 64: * </ul> 65: * The modifier character indicates that accents sort backward as is the 66: * case with French. The modifier applies to all rules <b>after</b> 67: * the modifier but before the next primary sequence. If placed at the end 68: * of the sequence if applies to all unknown accented character. 69: * The relational operators specify how the text 70: * argument relates to the previous term. The relation characters have 71: * the following meanings: 72: * <ul> 73: * <li>'<' - The text argument is greater than the prior term at the primary 74: * difference level.</li> 75: * <li>';' - The text argument is greater than the prior term at the secondary 76: * difference level.</li> 77: * <li>',' - The text argument is greater than the prior term at the tertiary 78: * difference level.</li> 79: * <li>'=' - The text argument is equal to the prior term</li> 80: * </ul> 81: * <p> 82: * As for the text argument itself, this is any sequence of Unicode 83: * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F, 84: * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are 85: * desired, they must be enclosed in single quotes. If any whitespace is 86: * encountered, it is ignored. (For example, "a b" is equal to "ab"). 87: * <p> 88: * The reset operation inserts the following rule at the point where the 89: * text argument to it exists in the previously declared rule string. This 90: * makes it easy to add new rules to an existing string by simply including 91: * them in a reset sequence at the end. Note that the text argument, or 92: * at least the first character of it, must be present somewhere in the 93: * previously declared rules in order to be inserted properly. If this 94: * is not satisfied, a <code>ParseException</code> will be thrown. 95: * <p> 96: * This system of configuring <code>RuleBasedCollator</code> is needlessly 97: * complex and the people at Taligent who developed it (along with the folks 98: * at Sun who accepted it into the Java standard library) deserve a slow 99: * and agonizing death. 100: * <p> 101: * Here are a couple of example of rule strings: 102: * <p> 103: * "< a < b < c" - This string says that a is greater than b which is 104: * greater than c, with all differences being primary differences. 105: * <p> 106: * "< a,A < b,B < c,C" - This string says that 'A' is greater than 'a' with 107: * a tertiary strength comparison. Both 'b' and 'B' are greater than 'a' and 108: * 'A' during a primary strength comparison. But 'B' is greater than 'b' 109: * under a tertiary strength comparison. 110: * <p> 111: * "< a < c & a < b " - This sequence is identical in function to the 112: * "< a < b < c" rule string above. The '&' reset symbol indicates that 113: * the rule "< b" is to be inserted after the text argument "a" in the 114: * previous rule string segment. 115: * <p> 116: * "< a < b & y < z" - This is an error. The character 'y' does not appear 117: * anywhere in the previous rule string segment so the rule following the 118: * reset rule cannot be inserted. 119: * <p> 120: * "< a & A @ < e & E < f& F" - This sequence is equivalent to the following 121: * "< a & A < E & e < f & F". 122: * <p> 123: * For a description of the various comparison strength types, see the 124: * documentation for the <code>Collator</code> class. 125: * <p> 126: * As an additional complication to this already overly complex rule scheme, 127: * if any characters precede the first rule, these characters are considered 128: * ignorable. They will be treated as if they did not exist during 129: * comparisons. For example, "- < a < b ..." would make '-' an ignorable 130: * character such that the strings "high-tech" and "hightech" would 131: * be considered identical. 132: * <p> 133: * A <code>ParseException</code> will be thrown for any of the following 134: * conditions: 135: * <ul> 136: * <li>Unquoted punctuation characters in a text argument.</li> 137: * <li>A relational or reset operator not followed by a text argument</li> 138: * <li>A reset operator where the text argument is not present in 139: * the previous rule string section.</li> 140: * </ul> 141: * 142: * @author Aaron M. Renn (arenn@urbanophile.com) 143: * @author Tom Tromey (tromey@cygnus.com) 144: * @author Guilhem Lavaux (guilhem@kaffe.org) 145: */ 146: public class RuleBasedCollator extends Collator 147: { 148: /** 149: * This class describes what rank has a character (or a sequence of characters) 150: * in the lexicographic order. Each element in a rule has a collation element. 151: */ 152: static final class CollationElement 153: { 154: String key; 155: int primary; 156: short secondary; 157: short tertiary; 158: short equality; 159: boolean ignore; 160: String expansion; 161: 162: CollationElement(String key, int primary, short secondary, short tertiary, 163: short equality, String expansion, boolean ignore) 164: { 165: this.key = key; 166: this.primary = primary; 167: this.secondary = secondary; 168: this.tertiary = tertiary; 169: this.equality = equality; 170: this.ignore = ignore; 171: this.expansion = expansion; 172: } 173: 174: int getValue() 175: { 176: return (primary << 16) + (secondary << 8) + tertiary; 177: } 178: } 179: 180: /** 181: * Basic collation instruction (internal format) to build the series of 182: * collation elements. It contains an instruction which specifies the new 183: * state of the generator. The sequence of instruction should not contain 184: * RESET (it is used by 185: * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)}) 186: * as a temporary state while merging two sets of instructions. 187: */ 188: static final class CollationSorter 189: { 190: static final int GREATERP = 0; 191: static final int GREATERS = 1; 192: static final int GREATERT = 2; 193: static final int EQUAL = 3; 194: static final int RESET = 4; 195: static final int INVERSE_SECONDARY = 5; 196: 197: int comparisonType; 198: String textElement; 199: int hashText; 200: int offset; 201: boolean ignore; 202: 203: String expansionOrdering; 204: } 205: 206: /** 207: * This the the original rule string. 208: */ 209: private String rules; 210: 211: /** 212: * This is the table of collation element values 213: */ 214: private Object[] ce_table; 215: 216: /** 217: * Quick-prefix finder. 218: */ 219: HashMap prefix_tree; 220: 221: /** 222: * This is the value of the last sequence entered into 223: * <code>ce_table</code>. It is used to compute the 224: * ordering value of unspecified character. 225: */ 226: private int last_primary_value; 227: 228: /** 229: * This is the value of the last secondary sequence of the 230: * primary 0, entered into 231: * <code>ce_table</code>. It is used to compute the 232: * ordering value of an unspecified accented character. 233: */ 234: private int last_tertiary_value; 235: 236: /** 237: * This variable is true if accents need to be sorted 238: * in the other direction. 239: */ 240: private boolean inverseAccentComparison; 241: 242: /** 243: * This collation element is special to unknown sequence. 244: * The JDK uses it to mark and sort the characters which has 245: * no collation rules. 246: */ 247: static final CollationElement SPECIAL_UNKNOWN_SEQ = 248: new CollationElement("", (short) 32767, (short) 0, (short) 0, 249: (short) 0, null, false); 250: 251: /** 252: * This method initializes a new instance of <code>RuleBasedCollator</code> 253: * with the specified collation rules. Note that an application normally 254: * obtains an instance of <code>RuleBasedCollator</code> by calling the 255: * <code>getInstance</code> method of <code>Collator</code>. That method 256: * automatically loads the proper set of rules for the desired locale. 257: * 258: * @param rules The collation rule string. 259: * 260: * @exception ParseException If the rule string contains syntax errors. 261: */ 262: public RuleBasedCollator(String rules) throws ParseException 263: { 264: if (rules.equals("")) 265: throw new ParseException("empty rule set", 0); 266: 267: this.rules = rules; 268: 269: buildCollationVector(parseString(rules)); 270: buildPrefixAccess(); 271: } 272: 273: /** 274: * This method returns the number of common characters at the beginning 275: * of the string of the two parameters. 276: * 277: * @param prefix A string considered as a prefix to test against 278: * the other string. 279: * @param s A string to test the prefix against. 280: * @return The number of common characters. 281: */ 282: static int findPrefixLength(String prefix, String s) 283: { 284: int index; 285: int len = prefix.length(); 286: 287: for (index = 0; index < len && index < s.length(); ++index) 288: { 289: if (prefix.charAt(index) != s.charAt(index)) 290: return index; 291: } 292: 293: 294: return index; 295: } 296: 297: /** 298: * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods 299: * checks whether it is possible to find an anchor point for the rules to be merged and 300: * then insert them at that precise point. 301: * 302: * @param offset Offset in the string containing rules of the beginning of the rules 303: * being merged in. 304: * @param starter Text of the rules being merged. 305: * @param main Repository of all already parsed rules. 306: * @param patch Rules to be merged into the repository. 307: * @throws ParseException if it is impossible to find an anchor point for the new rules. 308: */ 309: private void mergeRules(int offset, String starter, ArrayList main, ArrayList patch) 310: throws ParseException 311: { 312: int insertion_point = -1; 313: int max_length = 0; 314: 315: /* We must check that no rules conflict with another already present. If it 316: * is the case delete the old rule. 317: */ 318: 319: /* For the moment good old O(N^2) algorithm. 320: */ 321: for (int i = 0; i < patch.size(); i++) 322: { 323: int j = 0; 324: 325: while (j < main.size()) 326: { 327: CollationSorter rule1 = (CollationSorter) patch.get(i); 328: CollationSorter rule2 = (CollationSorter) main.get(j); 329: 330: if (rule1.textElement.equals(rule2.textElement)) 331: main.remove(j); 332: else 333: j++; 334: } 335: } 336: 337: // Find the insertion point... O(N) 338: for (int i = 0; i < main.size(); i++) 339: { 340: CollationSorter sorter = (CollationSorter) main.get(i); 341: int length = findPrefixLength(starter, sorter.textElement); 342: 343: if (length > max_length) 344: { 345: max_length = length; 346: insertion_point = i+1; 347: } 348: } 349: 350: if (insertion_point < 0) 351: throw new ParseException("no insertion point found for " + starter, offset); 352: 353: if (max_length < starter.length()) 354: { 355: /* 356: * We need to expand the first entry. It must be sorted 357: * like if it was the reference key itself (like the spec 358: * said. So the first entry is special: the element is 359: * replaced by the specified text element for the sorting. 360: * This text replace the old one for comparisons. However 361: * to preserve the behaviour we replace the first key (corresponding 362: * to the found prefix) by a new code rightly ordered in the 363: * sequence. The rest of the subsequence must be appended 364: * to the end of the sequence. 365: */ 366: CollationSorter sorter = (CollationSorter) patch.get(0); 367: CollationSorter expansionPrefix = 368: (CollationSorter) main.get(insertion_point-1); 369: 370: sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element 371: 372: main.add(insertion_point, sorter); 373: 374: /* 375: * This is a new set of rules. Append to the list. 376: */ 377: patch.remove(0); 378: insertion_point++; 379: } 380: 381: // Now insert all elements of patch at the insertion point. 382: for (int i = 0; i < patch.size(); i++) 383: main.add(i+insertion_point, patch.get(i)); 384: } 385: 386: /** 387: * This method parses a string and build a set of sorting instructions. The parsing 388: * may only be partial on the case the rules are to be merged sometime later. 389: * 390: * @param stop_on_reset If this parameter is true then the parser stops when it 391: * encounters a reset instruction. In the other case, it tries to parse the subrules 392: * and merged it in the same repository. 393: * @param v Output vector for the set of instructions. 394: * @param base_offset Offset in the string to begin parsing. 395: * @param rules Rules to be parsed. 396: * @return -1 if the parser reached the end of the string, an integer representing the 397: * offset in the string at which it stopped parsing. 398: * @throws ParseException if something turned wrong during the parsing. To get details 399: * decode the message. 400: */ 401: private int subParseString(boolean stop_on_reset, ArrayList v, 402: int base_offset, String rules) 403: throws ParseException 404: { 405: boolean ignoreChars = (base_offset == 0); 406: int operator = -1; 407: StringBuffer sb = new StringBuffer(); 408: boolean doubleQuote = false; 409: boolean eatingChars = false; 410: boolean nextIsModifier = false; 411: boolean isModifier = false; 412: int i; 413: 414: main_parse_loop: 415: for (i = 0; i < rules.length(); i++) 416: { 417: char c = rules.charAt(i); 418: int type = -1; 419: 420: if (!eatingChars && 421: ((c >= 0x09 && c <= 0x0D) || (c == 0x20))) 422: continue; 423: 424: isModifier = nextIsModifier; 425: nextIsModifier = false; 426: 427: if (eatingChars && c != '\'') 428: { 429: doubleQuote = false; 430: sb.append(c); 431: continue; 432: } 433: if (doubleQuote && eatingChars) 434: { 435: sb.append(c); 436: doubleQuote = false; 437: continue; 438: } 439: 440: switch (c) 441: { 442: case '!': 443: throw new ParseException 444: ("Modifier '!' is not yet supported by Classpath", i + base_offset); 445: case '<': 446: type = CollationSorter.GREATERP; 447: break; 448: case ';': 449: type = CollationSorter.GREATERS; 450: break; 451: case ',': 452: type = CollationSorter.GREATERT; 453: break; 454: case '=': 455: type = CollationSorter.EQUAL; 456: break; 457: case '\'': 458: eatingChars = !eatingChars; 459: doubleQuote = true; 460: break; 461: case '@': 462: if (ignoreChars) 463: throw new ParseException 464: ("comparison list has not yet been started. You may only use" 465: + "(<,;=&)", i + base_offset); 466: // Inverse the order of secondaries from now on. 467: nextIsModifier = true; 468: type = CollationSorter.INVERSE_SECONDARY; 469: break; 470: case '&': 471: type = CollationSorter.RESET; 472: if (stop_on_reset) 473: break main_parse_loop; 474: break; 475: default: 476: if (operator < 0) 477: throw new ParseException 478: ("operator missing at " + (i + base_offset), i + base_offset); 479: if (! eatingChars 480: && ((c >= 0x21 && c <= 0x2F) 481: || (c >= 0x3A && c <= 0x40) 482: || (c >= 0x5B && c <= 0x60) 483: || (c >= 0x7B && c <= 0x7E))) 484: throw new ParseException 485: ("unquoted punctuation character '" + c + "'", i + base_offset); 486: 487: //type = ignoreChars ? CollationSorter.IGNORE : -1; 488: sb.append(c); 489: break; 490: } 491: 492: if (type < 0) 493: continue; 494: 495: if (operator < 0) 496: { 497: operator = type; 498: continue; 499: } 500: 501: if (sb.length() == 0 && !isModifier) 502: throw new ParseException 503: ("text element empty at " + (i+base_offset), i+base_offset); 504: 505: if (operator == CollationSorter.RESET) 506: { 507: /* Reposition in the sorting list at the position 508: * indicated by the text element. 509: */ 510: String subrules = rules.substring(i); 511: ArrayList sorted_rules = new ArrayList(); 512: int idx; 513: 514: // Parse the subrules but do not iterate through all 515: // sublist. This is the privilege of the first call. 516: idx = subParseString(true, sorted_rules, base_offset+i, subrules); 517: 518: // Merge new parsed rules into the list. 519: mergeRules(base_offset+i, sb.toString(), v, sorted_rules); 520: sb.setLength(0); 521: 522: // Reset state to none. 523: operator = -1; 524: type = -1; 525: // We have found a new subrule at 'idx' but it has not been parsed. 526: if (idx >= 0) 527: { 528: i += idx-1; 529: continue main_parse_loop; 530: } 531: else 532: // No more rules. 533: break main_parse_loop; 534: } 535: 536: CollationSorter sorter = new CollationSorter(); 537: 538: if (operator == CollationSorter.GREATERP) 539: ignoreChars = false; 540: 541: sorter.comparisonType = operator; 542: sorter.textElement = sb.toString(); 543: sorter.hashText = sorter.textElement.hashCode(); 544: sorter.offset = base_offset+rules.length(); 545: sorter.ignore = ignoreChars; 546: sb.setLength(0); 547: 548: v.add(sorter); 549: operator = type; 550: } 551: 552: if (operator >= 0) 553: { 554: CollationSorter sorter = new CollationSorter(); 555: int pos = rules.length() + base_offset; 556: 557: if ((sb.length() != 0 && nextIsModifier) 558: || (sb.length() == 0 && !nextIsModifier && !eatingChars)) 559: throw new ParseException("text element empty at " + pos, pos); 560: 561: if (operator == CollationSorter.GREATERP) 562: ignoreChars = false; 563: 564: sorter.comparisonType = operator; 565: sorter.textElement = sb.toString(); 566: sorter.hashText = sorter.textElement.hashCode(); 567: sorter.offset = base_offset+pos; 568: sorter.ignore = ignoreChars; 569: v.add(sorter); 570: } 571: 572: if (i == rules.length()) 573: return -1; 574: else 575: return i; 576: } 577: 578: /** 579: * This method creates a copy of this object. 580: * 581: * @return A copy of this object. 582: */ 583: public Object clone() 584: { 585: return super.clone(); 586: } 587: 588: /** 589: * This method completely parses a string 'rules' containing sorting rules. 590: * 591: * @param rules String containing the rules to be parsed. 592: * @return A set of sorting instructions stored in a Vector. 593: * @throws ParseException if something turned wrong during the parsing. To get details 594: * decode the message. 595: */ 596: private ArrayList parseString(String rules) 597: throws ParseException 598: { 599: ArrayList v = new ArrayList(); 600: 601: // result of the first subParseString is not absolute (may be -1 or a 602: // positive integer). But we do not care. 603: subParseString(false, v, 0, rules); 604: 605: return v; 606: } 607: 608: /** 609: * This method uses the sorting instructions built by {@link #parseString} 610: * to build collation elements which can be directly used to sort strings. 611: * 612: * @param parsedElements Parsed instructions stored in a ArrayList. 613: * @throws ParseException if the order of the instructions are not valid. 614: */ 615: private void buildCollationVector(ArrayList parsedElements) 616: throws ParseException 617: { 618: int primary_seq = 0; 619: int last_tertiary_seq = 0; 620: short secondary_seq = 0; 621: short tertiary_seq = 0; 622: short equality_seq = 0; 623: boolean inverseComparisons = false; 624: final boolean DECREASING = false; 625: final boolean INCREASING = true; 626: boolean secondaryType = INCREASING; 627: ArrayList v = new ArrayList(); 628: 629: // elts is completely sorted. 630: element_loop: 631: for (int i = 0; i < parsedElements.size(); i++) 632: { 633: CollationSorter elt = (CollationSorter) parsedElements.get(i); 634: boolean ignoreChar = false; 635: 636: switch (elt.comparisonType) 637: { 638: case CollationSorter.GREATERP: 639: primary_seq++; 640: if (inverseComparisons) 641: { 642: secondary_seq = Short.MAX_VALUE; 643: secondaryType = DECREASING; 644: } 645: else 646: { 647: secondary_seq = 0; 648: secondaryType = INCREASING; 649: } 650: tertiary_seq = 0; 651: equality_seq = 0; 652: inverseComparisons = false; 653: break; 654: case CollationSorter.GREATERS: 655: if (secondaryType == DECREASING) 656: secondary_seq--; 657: else 658: secondary_seq++; 659: tertiary_seq = 0; 660: equality_seq = 0; 661: break; 662: case CollationSorter.INVERSE_SECONDARY: 663: inverseComparisons = true; 664: continue element_loop; 665: case CollationSorter.GREATERT: 666: tertiary_seq++; 667: if (primary_seq == 0) 668: last_tertiary_seq = tertiary_seq; 669: equality_seq = 0; 670: break; 671: case CollationSorter.EQUAL: 672: equality_seq++; 673: break; 674: case CollationSorter.RESET: 675: throw new ParseException 676: ("Invalid reached state 'RESET'. Internal error", elt.offset); 677: default: 678: throw new ParseException 679: ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset); 680: } 681: 682: v.add(new CollationElement(elt.textElement, primary_seq, 683: secondary_seq, tertiary_seq, 684: equality_seq, elt.expansionOrdering, elt.ignore)); 685: } 686: 687: this.inverseAccentComparison = inverseComparisons; 688: 689: ce_table = v.toArray(); 690: 691: last_primary_value = primary_seq+1; 692: last_tertiary_value = last_tertiary_seq+1; 693: } 694: 695: /** 696: * Build a tree where all keys are the texts of collation elements and data is 697: * the collation element itself. The tree is used when extracting all prefix 698: * for a given text. 699: */ 700: private void buildPrefixAccess() 701: { 702: prefix_tree = new HashMap(); 703: 704: for (int i = 0; i < ce_table.length; i++) 705: { 706: CollationElement e = (CollationElement) ce_table[i]; 707: 708: prefix_tree.put(e.key, e); 709: } 710: } 711: 712: /** 713: * This method returns an integer which indicates whether the first 714: * specified <code>String</code> is less than, greater than, or equal to 715: * the second. The value depends not only on the collation rules in 716: * effect, but also the strength and decomposition settings of this object. 717: * 718: * @param source The first <code>String</code> to compare. 719: * @param target A second <code>String</code> to compare to the first. 720: * 721: * @return A negative integer if source < target, a positive integer 722: * if source > target, or 0 if source == target. 723: */ 724: public int compare(String source, String target) 725: { 726: CollationElementIterator cs, ct; 727: CollationElement ord1block = null; 728: CollationElement ord2block = null; 729: boolean advance_block_1 = true; 730: boolean advance_block_2 = true; 731: 732: cs = getCollationElementIterator(source); 733: ct = getCollationElementIterator(target); 734: 735: for(;;) 736: { 737: int ord1; 738: int ord2; 739: 740: /* 741: * We have to check whether the characters are ignorable. 742: * If it is the case then forget them. 743: */ 744: if (advance_block_1) 745: { 746: ord1block = cs.nextBlock(); 747: if (ord1block != null && ord1block.ignore) 748: continue; 749: } 750: 751: if (advance_block_2) 752: { 753: ord2block = ct.nextBlock(); 754: if (ord2block != null && ord2block.ignore) 755: { 756: advance_block_1 = false; 757: continue; 758: } 759: } 760: else 761: advance_block_2 = true; 762: 763: if (!advance_block_1) 764: advance_block_1 = true; 765: 766: if (ord1block != null) 767: ord1 = ord1block.getValue(); 768: else 769: { 770: if (ord2block == null) 771: return 0; 772: return -1; 773: } 774: 775: if (ord2block == null) 776: return 1; 777: 778: ord2 = ord2block.getValue(); 779: 780: // We know chars are totally equal, so skip 781: if (ord1 == ord2) 782: { 783: if (getStrength() == IDENTICAL) 784: if (!ord1block.key.equals(ord2block.key)) 785: return ord1block.key.compareTo(ord2block.key); 786: continue; 787: } 788: 789: // Check for primary strength differences 790: int prim1 = CollationElementIterator.primaryOrder(ord1); 791: int prim2 = CollationElementIterator.primaryOrder(ord2); 792: 793: if (prim1 == 0 && getStrength() < TERTIARY) 794: { 795: advance_block_2 = false; 796: continue; 797: } 798: else if (prim2 == 0 && getStrength() < TERTIARY) 799: { 800: advance_block_1 = false; 801: continue; 802: } 803: 804: if (prim1 < prim2) 805: return -1; 806: else if (prim1 > prim2) 807: return 1; 808: else if (getStrength() == PRIMARY) 809: continue; 810: 811: // Check for secondary strength differences 812: int sec1 = CollationElementIterator.secondaryOrder(ord1); 813: int sec2 = CollationElementIterator.secondaryOrder(ord2); 814: 815: if (sec1 < sec2) 816: return -1; 817: else if (sec1 > sec2) 818: return 1; 819: else if (getStrength() == SECONDARY) 820: continue; 821: 822: // Check for tertiary differences 823: int tert1 = CollationElementIterator.tertiaryOrder(ord1); 824: int tert2 = CollationElementIterator.tertiaryOrder(ord2); 825: 826: if (tert1 < tert2) 827: return -1; 828: else if (tert1 > tert2) 829: return 1; 830: else if (getStrength() == TERTIARY) 831: continue; 832: 833: // Apparently JDK does this (at least for my test case). 834: return ord1block.key.compareTo(ord2block.key); 835: } 836: } 837: 838: /** 839: * This method tests this object for equality against the specified 840: * object. This will be true if and only if the specified object is 841: * another reference to this object. 842: * 843: * @param obj The <code>Object</code> to compare against this object. 844: * 845: * @return <code>true</code> if the specified object is equal to this object, 846: * <code>false</code> otherwise. 847: */ 848: public boolean equals(Object obj) 849: { 850: if (obj == this) 851: return true; 852: else 853: return false; 854: } 855: 856: /** 857: * This method builds a default collation element without invoking 858: * the database created from the rules passed to the constructor. 859: * 860: * @param c Character which needs a collation element. 861: * @return A valid brand new CollationElement instance. 862: */ 863: CollationElement getDefaultElement(char c) 864: { 865: int v; 866: 867: // Preliminary support for generic accent sorting inversion (I don't know if all 868: // characters in the range should be sorted backward). This is the place 869: // to fix this if needed. 870: if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361)) 871: v = 0x0361 - ((int) c - 0x02B9); 872: else 873: v = (short) c; 874: return new CollationElement("" + c, last_primary_value + v, 875: (short) 0, (short) 0, (short) 0, null, false); 876: } 877: 878: /** 879: * This method builds a default collation element for an accented character 880: * without invoking the database created from the rules passed to the constructor. 881: * 882: * @param c Character which needs a collation element. 883: * @return A valid brand new CollationElement instance. 884: */ 885: CollationElement getDefaultAccentedElement(char c) 886: { 887: int v; 888: 889: // Preliminary support for generic accent sorting inversion (I don't know if all 890: // characters in the range should be sorted backward). This is the place 891: // to fix this if needed. 892: if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361)) 893: v = 0x0361 - ((int) c - 0x02B9); 894: else 895: v = (short) c; 896: return new CollationElement("" + c, (short) 0, 897: (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false); 898: } 899: 900: /** 901: * This method returns an instance for <code>CollationElementIterator</code> 902: * for the specified <code>String</code> under the collation rules for this 903: * object. 904: * 905: * @param source The <code>String</code> to return the 906: * <code>CollationElementIterator</code> instance for. 907: * 908: * @return A <code>CollationElementIterator</code> for the specified 909: * <code>String</code>. 910: */ 911: public CollationElementIterator getCollationElementIterator(String source) 912: { 913: return new CollationElementIterator(this, source); 914: } 915: 916: /** 917: * This method returns an instance of <code>CollationElementIterator</code> 918: * for the <code>String</code> represented by the specified 919: * <code>CharacterIterator</code>. 920: * 921: * @param source The <code>CharacterIterator</code> with the desired <code>String</code>. 922: * 923: * @return A <code>CollationElementIterator</code> for the specified <code>String</code>. 924: */ 925: public CollationElementIterator getCollationElementIterator(CharacterIterator source) 926: throws NotImplementedException // Because decomposeCharacter does not work 927: { 928: StringBuffer expand = new StringBuffer(""); 929: 930: // Right now we assume that we will read from the beginning of the string. 931: for (char c = source.first(); 932: c != CharacterIterator.DONE; 933: c = source.next()) 934: decomposeCharacter(c, expand); 935: 936: return getCollationElementIterator(expand.toString()); 937: } 938: 939: /** 940: * This method returns an instance of <code>CollationKey</code> for the 941: * specified <code>String</code>. The object returned will have a 942: * more efficient mechanism for its comparison function that could 943: * provide speed benefits if multiple comparisons are performed, such 944: * as during a sort. 945: * 946: * @param source The <code>String</code> to create a <code>CollationKey</code> for. 947: * 948: * @return A <code>CollationKey</code> for the specified <code>String</code>. 949: */ 950: public CollationKey getCollationKey(String source) 951: { 952: CollationElementIterator cei = getCollationElementIterator(source); 953: ArrayList vect = new ArrayList(); 954: 955: int ord = cei.next(); 956: cei.reset(); //set to start of string 957: 958: while (ord != CollationElementIterator.NULLORDER) 959: { 960: // If the primary order is null, it means this is an ignorable 961: // character. 962: if (CollationElementIterator.primaryOrder(ord) == 0) 963: { 964: ord = cei.next(); 965: continue; 966: } 967: switch (getStrength()) 968: { 969: case PRIMARY: 970: ord = CollationElementIterator.primaryOrder(ord); 971: break; 972: 973: case SECONDARY: 974: ord = CollationElementIterator.primaryOrder(ord) << 8; 975: ord |= CollationElementIterator.secondaryOrder(ord); 976: 977: default: 978: break; 979: } 980: 981: vect.add(new Integer(ord)); 982: ord = cei.next(); //increment to next key 983: } 984: 985: Object[] objarr = vect.toArray(); 986: byte[] key = new byte[objarr.length * 4]; 987: 988: for (int i = 0; i < objarr.length; i++) 989: { 990: int j = ((Integer) objarr[i]).intValue(); 991: key [i * 4] = (byte) ((j & 0xFF000000) >> 24); 992: key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16); 993: key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8); 994: key [i * 4 + 3] = (byte) (j & 0x000000FF); 995: } 996: 997: return new CollationKey(this, source, key); 998: } 999: 1000: /** 1001: * This method returns a <code>String</code> containing the collation rules 1002: * for this object. 1003: * 1004: * @return The collation rules for this object. 1005: */ 1006: public String getRules() 1007: { 1008: return rules; 1009: } 1010: 1011: /** 1012: * This method returns a hash value for this object. 1013: * 1014: * @return A hash value for this object. 1015: */ 1016: public int hashCode() 1017: { 1018: return System.identityHashCode(this); 1019: } 1020: }
GNU Classpath (0.95) |