GNU Classpath (0.95) | |
Frames | No Frames |
1: /* String.java -- immutable character sequences; the object of string literals 2: Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 3: Free Software Foundation, Inc. 4: 5: This file is part of GNU Classpath. 6: 7: GNU Classpath is free software; you can redistribute it and/or modify 8: it under the terms of the GNU General Public License as published by 9: the Free Software Foundation; either version 2, or (at your option) 10: any later version. 11: 12: GNU Classpath is distributed in the hope that it will be useful, but 13: WITHOUT ANY WARRANTY; without even the implied warranty of 14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15: General Public License for more details. 16: 17: You should have received a copy of the GNU General Public License 18: along with GNU Classpath; see the file COPYING. If not, write to the 19: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 20: 02110-1301 USA. 21: 22: Linking this library statically or dynamically with other modules is 23: making a combined work based on this library. Thus, the terms and 24: conditions of the GNU General Public License cover the whole 25: combination. 26: 27: As a special exception, the copyright holders of this library give you 28: permission to link this library with independent modules to produce an 29: executable, regardless of the license terms of these independent 30: modules, and to copy and distribute the resulting executable under 31: terms of your choice, provided that you also meet, for each linked 32: independent module, the terms and conditions of the license of that 33: module. An independent module is a module which is not derived from 34: or based on this library. If you modify this library, you may extend 35: this exception to your version of the library, but you are not 36: obligated to do so. If you do not wish to do so, delete this 37: exception statement from your version. */ 38: 39: 40: package java.lang; 41: 42: import gnu.java.lang.CharData; 43: 44: import java.io.Serializable; 45: import java.io.UnsupportedEncodingException; 46: import java.nio.ByteBuffer; 47: import java.nio.CharBuffer; 48: import java.nio.charset.CharacterCodingException; 49: import java.nio.charset.Charset; 50: import java.nio.charset.CharsetDecoder; 51: import java.nio.charset.CharsetEncoder; 52: import java.nio.charset.CodingErrorAction; 53: import java.nio.charset.IllegalCharsetNameException; 54: import java.nio.charset.UnsupportedCharsetException; 55: import java.text.Collator; 56: import java.util.Comparator; 57: import java.util.Formatter; 58: import java.util.Locale; 59: import java.util.regex.Matcher; 60: import java.util.regex.Pattern; 61: import java.util.regex.PatternSyntaxException; 62: 63: /** 64: * Strings represent an immutable set of characters. All String literals 65: * are instances of this class, and two string literals with the same contents 66: * refer to the same String object. 67: * 68: * <p>This class also includes a number of methods for manipulating the 69: * contents of strings (of course, creating a new object if there are any 70: * changes, as String is immutable). Case mapping relies on Unicode 3.0.0 71: * standards, where some character sequences have a different number of 72: * characters in the uppercase version than the lower case. 73: * 74: * <p>Strings are special, in that they are the only object with an overloaded 75: * operator. When you use '+' with at least one String argument, both 76: * arguments have String conversion performed on them, and another String (not 77: * guaranteed to be unique) results. 78: * 79: * <p>String is special-cased when doing data serialization - rather than 80: * listing the fields of this class, a String object is converted to a string 81: * literal in the object stream. 82: * 83: * @author Paul N. Fisher 84: * @author Eric Blake (ebb9@email.byu.edu) 85: * @author Per Bothner (bothner@cygnus.com) 86: * @author Tom Tromey (tromey@redhat.com) 87: * @author Andrew John Hughes (gnu_andrew@member.fsf.org) 88: * @since 1.0 89: * @status updated to 1.4; but could use better data sharing via offset field 90: */ 91: public final class String 92: implements Serializable, Comparable<String>, CharSequence 93: { 94: // WARNING: String is a CORE class in the bootstrap cycle. See the comments 95: // in vm/reference/java/lang/Runtime for implications of this fact. 96: 97: /** 98: * This is probably not necessary because this class is special cased already 99: * but it will avoid showing up as a discrepancy when comparing SUIDs. 100: */ 101: private static final long serialVersionUID = -6849794470754667710L; 102: 103: /** 104: * Stores unicode multi-character uppercase expansion table. 105: * @see #toUpperCase(Locale) 106: * @see CharData#UPPER_EXPAND 107: */ 108: private static final char[] upperExpand 109: = zeroBasedStringValue(CharData.UPPER_EXPAND); 110: 111: /** 112: * Stores unicode multi-character uppercase special casing table. 113: * @see #upperCaseExpansion(char) 114: * @see CharData#UPPER_SPECIAL 115: */ 116: private static final char[] upperSpecial 117: = zeroBasedStringValue(CharData.UPPER_SPECIAL); 118: 119: /** 120: * Characters which make up the String. 121: * Package access is granted for use by StringBuffer. 122: */ 123: final char[] value; 124: 125: /** 126: * Holds the number of characters in value. This number is generally 127: * the same as value.length, but can be smaller because substrings and 128: * StringBuffers can share arrays. Package visible for use by trusted code. 129: */ 130: final int count; 131: 132: /** 133: * Caches the result of hashCode(). If this value is zero, the hashcode 134: * is considered uncached (even if 0 is the correct hash value). 135: */ 136: private int cachedHashCode; 137: 138: /** 139: * Holds the starting position for characters in value[]. Since 140: * substring()'s are common, the use of offset allows the operation 141: * to perform in O(1). Package access is granted for use by StringBuffer. 142: */ 143: final int offset; 144: 145: /** 146: * An implementation for {@link #CASE_INSENSITIVE_ORDER}. 147: * This must be {@link Serializable}. The class name is dictated by 148: * compatibility with Sun's JDK. 149: */ 150: private static final class CaseInsensitiveComparator 151: implements Comparator<String>, Serializable 152: { 153: /** 154: * Compatible with JDK 1.2. 155: */ 156: private static final long serialVersionUID = 8575799808933029326L; 157: 158: /** 159: * The default private constructor generates unnecessary overhead. 160: */ 161: CaseInsensitiveComparator() {} 162: 163: /** 164: * Compares to Strings, using 165: * <code>String.compareToIgnoreCase(String)</code>. 166: * 167: * @param o1 the first string 168: * @param o2 the second string 169: * @return < 0, 0, or > 0 depending on the case-insensitive 170: * comparison of the two strings. 171: * @throws NullPointerException if either argument is null 172: * @throws ClassCastException if either argument is not a String 173: * @see #compareToIgnoreCase(String) 174: */ 175: public int compare(String o1, String o2) 176: { 177: return o1.compareToIgnoreCase(o2); 178: } 179: } // class CaseInsensitiveComparator 180: 181: /** 182: * A Comparator that uses <code>String.compareToIgnoreCase(String)</code>. 183: * This comparator is {@link Serializable}. Note that it ignores Locale, 184: * for that, you want a Collator. 185: * 186: * @see Collator#compare(String, String) 187: * @since 1.2 188: */ 189: public static final Comparator<String> CASE_INSENSITIVE_ORDER 190: = new CaseInsensitiveComparator(); 191: 192: /** 193: * Creates an empty String (length 0). Unless you really need a new object, 194: * consider using <code>""</code> instead. 195: */ 196: public String() 197: { 198: value = "".value; 199: offset = 0; 200: count = 0; 201: } 202: 203: /** 204: * Copies the contents of a String to a new String. Since Strings are 205: * immutable, only a shallow copy is performed. 206: * 207: * @param str String to copy 208: * @throws NullPointerException if value is null 209: */ 210: public String(String str) 211: { 212: value = str.value; 213: offset = str.offset; 214: count = str.count; 215: cachedHashCode = str.cachedHashCode; 216: } 217: 218: /** 219: * Creates a new String using the character sequence of the char array. 220: * Subsequent changes to data do not affect the String. 221: * 222: * @param data char array to copy 223: * @throws NullPointerException if data is null 224: */ 225: public String(char[] data) 226: { 227: this(data, 0, data.length, false); 228: } 229: 230: /** 231: * Creates a new String using the character sequence of a subarray of 232: * characters. The string starts at offset, and copies count chars. 233: * Subsequent changes to data do not affect the String. 234: * 235: * @param data char array to copy 236: * @param offset position (base 0) to start copying out of data 237: * @param count the number of characters from data to copy 238: * @throws NullPointerException if data is null 239: * @throws IndexOutOfBoundsException if (offset < 0 || count < 0 240: * || offset + count < 0 (overflow) 241: * || offset + count > data.length) 242: * (while unspecified, this is a StringIndexOutOfBoundsException) 243: */ 244: public String(char[] data, int offset, int count) 245: { 246: this(data, offset, count, false); 247: } 248: 249: /** 250: * Creates a new String using an 8-bit array of integer values, starting at 251: * an offset, and copying up to the count. Each character c, using 252: * corresponding byte b, is created in the new String as if by performing: 253: * 254: * <pre> 255: * c = (char) (((hibyte & 0xff) << 8) | (b & 0xff)) 256: * </pre> 257: * 258: * @param ascii array of integer values 259: * @param hibyte top byte of each Unicode character 260: * @param offset position (base 0) to start copying out of ascii 261: * @param count the number of characters from ascii to copy 262: * @throws NullPointerException if ascii is null 263: * @throws IndexOutOfBoundsException if (offset < 0 || count < 0 264: * || offset + count < 0 (overflow) 265: * || offset + count > ascii.length) 266: * (while unspecified, this is a StringIndexOutOfBoundsException) 267: * @see #String(byte[]) 268: * @see #String(byte[], String) 269: * @see #String(byte[], int, int) 270: * @see #String(byte[], int, int, String) 271: * @deprecated use {@link #String(byte[], int, int, String)} to perform 272: * correct encoding 273: */ 274: public String(byte[] ascii, int hibyte, int offset, int count) 275: { 276: if (offset < 0) 277: throw new StringIndexOutOfBoundsException("offset: " + offset); 278: if (count < 0) 279: throw new StringIndexOutOfBoundsException("count: " + count); 280: // equivalent to: offset + count < 0 || offset + count > ascii.length 281: if (ascii.length - offset < count) 282: throw new StringIndexOutOfBoundsException("offset + count: " 283: + (offset + count)); 284: value = new char[count]; 285: this.offset = 0; 286: this.count = count; 287: hibyte <<= 8; 288: offset += count; 289: while (--count >= 0) 290: value[count] = (char) (hibyte | (ascii[--offset] & 0xff)); 291: } 292: 293: /** 294: * Creates a new String using an 8-bit array of integer values. Each 295: * character c, using corresponding byte b, is created in the new String 296: * as if by performing: 297: * 298: * <pre> 299: * c = (char) (((hibyte & 0xff) << 8) | (b & 0xff)) 300: * </pre> 301: * 302: * @param ascii array of integer values 303: * @param hibyte top byte of each Unicode character 304: * @throws NullPointerException if ascii is null 305: * @see #String(byte[]) 306: * @see #String(byte[], String) 307: * @see #String(byte[], int, int) 308: * @see #String(byte[], int, int, String) 309: * @see #String(byte[], int, int, int) 310: * @deprecated use {@link #String(byte[], String)} to perform 311: * correct encoding 312: */ 313: public String(byte[] ascii, int hibyte) 314: { 315: this(ascii, hibyte, 0, ascii.length); 316: } 317: 318: /** 319: * Creates a new String using the portion of the byte array starting at the 320: * offset and ending at offset + count. Uses the specified encoding type 321: * to decode the byte array, so the resulting string may be longer or 322: * shorter than the byte array. For more decoding control, use 323: * {@link java.nio.charset.CharsetDecoder}, and for valid character sets, 324: * see {@link java.nio.charset.Charset}. The behavior is not specified if 325: * the decoder encounters invalid characters; this implementation throws 326: * an Error. 327: * 328: * @param data byte array to copy 329: * @param offset the offset to start at 330: * @param count the number of bytes in the array to use 331: * @param encoding the name of the encoding to use 332: * @throws NullPointerException if data or encoding is null 333: * @throws IndexOutOfBoundsException if offset or count is incorrect 334: * (while unspecified, this is a StringIndexOutOfBoundsException) 335: * @throws UnsupportedEncodingException if encoding is not found 336: * @throws Error if the decoding fails 337: * @since 1.1 338: */ 339: public String(byte[] data, int offset, int count, String encoding) 340: throws UnsupportedEncodingException 341: { 342: if (offset < 0) 343: throw new StringIndexOutOfBoundsException("offset: " + offset); 344: if (count < 0) 345: throw new StringIndexOutOfBoundsException("count: " + count); 346: // equivalent to: offset + count < 0 || offset + count > data.length 347: if (data.length - offset < count) 348: throw new StringIndexOutOfBoundsException("offset + count: " 349: + (offset + count)); 350: try 351: { 352: CharsetDecoder csd = Charset.forName(encoding).newDecoder(); 353: csd.onMalformedInput(CodingErrorAction.REPLACE); 354: csd.onUnmappableCharacter(CodingErrorAction.REPLACE); 355: CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count)); 356: if(cbuf.hasArray()) 357: { 358: value = cbuf.array(); 359: this.offset = cbuf.position(); 360: this.count = cbuf.remaining(); 361: } else { 362: // Doubt this will happen. But just in case. 363: value = new char[cbuf.remaining()]; 364: cbuf.get(value); 365: this.offset = 0; 366: this.count = value.length; 367: } 368: } catch(CharacterCodingException e){ 369: throw new UnsupportedEncodingException("Encoding: "+encoding+ 370: " not found."); 371: } catch(IllegalCharsetNameException e){ 372: throw new UnsupportedEncodingException("Encoding: "+encoding+ 373: " not found."); 374: } catch(UnsupportedCharsetException e){ 375: throw new UnsupportedEncodingException("Encoding: "+encoding+ 376: " not found."); 377: } 378: } 379: 380: /** 381: * Creates a new String using the byte array. Uses the specified encoding 382: * type to decode the byte array, so the resulting string may be longer or 383: * shorter than the byte array. For more decoding control, use 384: * {@link java.nio.charset.CharsetDecoder}, and for valid character sets, 385: * see {@link java.nio.charset.Charset}. The behavior is not specified if 386: * the decoder encounters invalid characters; this implementation throws 387: * an Error. 388: * 389: * @param data byte array to copy 390: * @param encoding the name of the encoding to use 391: * @throws NullPointerException if data or encoding is null 392: * @throws UnsupportedEncodingException if encoding is not found 393: * @throws Error if the decoding fails 394: * @see #String(byte[], int, int, String) 395: * @since 1.1 396: */ 397: public String(byte[] data, String encoding) 398: throws UnsupportedEncodingException 399: { 400: this(data, 0, data.length, encoding); 401: } 402: 403: /** 404: * Creates a new String using the portion of the byte array starting at the 405: * offset and ending at offset + count. Uses the encoding of the platform's 406: * default charset, so the resulting string may be longer or shorter than 407: * the byte array. For more decoding control, use 408: * {@link java.nio.charset.CharsetDecoder}. The behavior is not specified 409: * if the decoder encounters invalid characters; this implementation throws 410: * an Error. 411: * 412: * @param data byte array to copy 413: * @param offset the offset to start at 414: * @param count the number of bytes in the array to use 415: * @throws NullPointerException if data is null 416: * @throws IndexOutOfBoundsException if offset or count is incorrect 417: * @throws Error if the decoding fails 418: * @see #String(byte[], int, int, String) 419: * @since 1.1 420: */ 421: public String(byte[] data, int offset, int count) 422: { 423: if (offset < 0) 424: throw new StringIndexOutOfBoundsException("offset: " + offset); 425: if (count < 0) 426: throw new StringIndexOutOfBoundsException("count: " + count); 427: // equivalent to: offset + count < 0 || offset + count > data.length 428: if (data.length - offset < count) 429: throw new StringIndexOutOfBoundsException("offset + count: " 430: + (offset + count)); 431: int o, c; 432: char[] v; 433: String encoding; 434: try 435: { 436: encoding = System.getProperty("file.encoding"); 437: CharsetDecoder csd = Charset.forName(encoding).newDecoder(); 438: csd.onMalformedInput(CodingErrorAction.REPLACE); 439: csd.onUnmappableCharacter(CodingErrorAction.REPLACE); 440: CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count)); 441: if(cbuf.hasArray()) 442: { 443: v = cbuf.array(); 444: o = cbuf.position(); 445: c = cbuf.remaining(); 446: } else { 447: // Doubt this will happen. But just in case. 448: v = new char[cbuf.remaining()]; 449: cbuf.get(v); 450: o = 0; 451: c = v.length; 452: } 453: } catch(Exception ex){ 454: // If anything goes wrong (System property not set, 455: // NIO provider not available, etc) 456: // Default to the 'safe' encoding ISO8859_1 457: v = new char[count]; 458: o = 0; 459: c = count; 460: for (int i=0;i<count;i++) 461: v[i] = (char)data[offset+i]; 462: } 463: this.value = v; 464: this.offset = o; 465: this.count = c; 466: } 467: 468: /** 469: * Creates a new String using the byte array. Uses the encoding of the 470: * platform's default charset, so the resulting string may be longer or 471: * shorter than the byte array. For more decoding control, use 472: * {@link java.nio.charset.CharsetDecoder}. The behavior is not specified 473: * if the decoder encounters invalid characters; this implementation throws 474: * an Error. 475: * 476: * @param data byte array to copy 477: * @throws NullPointerException if data is null 478: * @throws Error if the decoding fails 479: * @see #String(byte[], int, int) 480: * @see #String(byte[], int, int, String) 481: * @since 1.1 482: */ 483: public String(byte[] data) 484: { 485: this(data, 0, data.length); 486: } 487: 488: /** 489: * Creates a new String using the character sequence represented by 490: * the StringBuffer. Subsequent changes to buf do not affect the String. 491: * 492: * @param buffer StringBuffer to copy 493: * @throws NullPointerException if buffer is null 494: */ 495: public String(StringBuffer buffer) 496: { 497: synchronized (buffer) 498: { 499: offset = 0; 500: count = buffer.count; 501: // Share unless buffer is 3/4 empty. 502: if ((count << 2) < buffer.value.length) 503: { 504: value = new char[count]; 505: VMSystem.arraycopy(buffer.value, 0, value, 0, count); 506: } 507: else 508: { 509: buffer.shared = true; 510: value = buffer.value; 511: } 512: } 513: } 514: 515: /** 516: * Creates a new String using the character sequence represented by 517: * the StringBuilder. Subsequent changes to buf do not affect the String. 518: * 519: * @param buffer StringBuilder to copy 520: * @throws NullPointerException if buffer is null 521: */ 522: public String(StringBuilder buffer) 523: { 524: this(buffer.value, 0, buffer.count); 525: } 526: 527: /** 528: * Special constructor which can share an array when safe to do so. 529: * 530: * @param data the characters to copy 531: * @param offset the location to start from 532: * @param count the number of characters to use 533: * @param dont_copy true if the array is trusted, and need not be copied 534: * @throws NullPointerException if chars is null 535: * @throws StringIndexOutOfBoundsException if bounds check fails 536: */ 537: String(char[] data, int offset, int count, boolean dont_copy) 538: { 539: if (offset < 0) 540: throw new StringIndexOutOfBoundsException("offset: " + offset); 541: if (count < 0) 542: throw new StringIndexOutOfBoundsException("count: " + count); 543: // equivalent to: offset + count < 0 || offset + count > data.length 544: if (data.length - offset < count) 545: throw new StringIndexOutOfBoundsException("offset + count: " 546: + (offset + count)); 547: if (dont_copy) 548: { 549: value = data; 550: this.offset = offset; 551: } 552: else 553: { 554: value = new char[count]; 555: VMSystem.arraycopy(data, offset, value, 0, count); 556: this.offset = 0; 557: } 558: this.count = count; 559: } 560: 561: /** 562: * Creates a new String containing the characters represented in the 563: * given subarray of Unicode code points. 564: * @param codePoints the entire array of code points 565: * @param offset the start of the subarray 566: * @param count the length of the subarray 567: * 568: * @throws IllegalArgumentException if an invalid code point is found 569: * in the codePoints array 570: * @throws IndexOutOfBoundsException if offset is negative or offset + count 571: * is greater than the length of the array. 572: */ 573: public String(int[] codePoints, int offset, int count) 574: { 575: // FIXME: This implementation appears to give correct internal 576: // representation of the String because: 577: // - length() is correct 578: // - getting a char[] from toCharArray() and testing 579: // Character.codePointAt() on all the characters in that array gives 580: // the appropriate results 581: // however printing the String gives incorrect results. This may be 582: // due to printing method errors (such as incorrectly looping through 583: // the String one char at a time rather than one "character" at a time. 584: 585: if (offset < 0) 586: throw new IndexOutOfBoundsException(); 587: int end = offset + count; 588: int pos = 0; 589: // This creates a char array that is long enough for all of the code 590: // points to represent supplementary characters. This is more than likely 591: // a waste of storage, so we use it only temporarily and then copy the 592: // used portion into the value array. 593: char[] temp = new char[2 * codePoints.length]; 594: for (int i = offset; i < end; i++) 595: { 596: pos += Character.toChars(codePoints[i], temp, pos); 597: } 598: this.count = pos; 599: this.value = new char[pos]; 600: System.arraycopy(temp, 0, value, 0, pos); 601: this.offset = 0; 602: } 603: 604: /** 605: * Returns the number of characters contained in this String. 606: * 607: * @return the length of this String 608: */ 609: public int length() 610: { 611: return count; 612: } 613: 614: /** 615: * Returns the character located at the specified index within this String. 616: * 617: * @param index position of character to return (base 0) 618: * @return character located at position index 619: * @throws IndexOutOfBoundsException if index < 0 || index >= length() 620: * (while unspecified, this is a StringIndexOutOfBoundsException) 621: */ 622: public char charAt(int index) 623: { 624: if (index < 0 || index >= count) 625: throw new StringIndexOutOfBoundsException(index); 626: return value[offset + index]; 627: } 628: 629: /** 630: * Get the code point at the specified index. This is like #charAt(int), 631: * but if the character is the start of a surrogate pair, and the 632: * following character completes the pair, then the corresponding 633: * supplementary code point is returned. 634: * @param index the index of the codepoint to get, starting at 0 635: * @return the codepoint at the specified index 636: * @throws IndexOutOfBoundsException if index is negative or >= length() 637: * @since 1.5 638: */ 639: public synchronized int codePointAt(int index) 640: { 641: // Use the CharSequence overload as we get better range checking 642: // this way. 643: return Character.codePointAt(this, index); 644: } 645: 646: /** 647: * Get the code point before the specified index. This is like 648: * #codePointAt(int), but checks the characters at <code>index-1</code> and 649: * <code>index-2</code> to see if they form a supplementary code point. 650: * @param index the index just past the codepoint to get, starting at 0 651: * @return the codepoint at the specified index 652: * @throws IndexOutOfBoundsException if index is negative or >= length() 653: * (while unspecified, this is a StringIndexOutOfBoundsException) 654: * @since 1.5 655: */ 656: public synchronized int codePointBefore(int index) 657: { 658: // Use the CharSequence overload as we get better range checking 659: // this way. 660: return Character.codePointBefore(this, index); 661: } 662: 663: /** 664: * Copies characters from this String starting at a specified start index, 665: * ending at a specified stop index, to a character array starting at 666: * a specified destination begin index. 667: * 668: * @param srcBegin index to begin copying characters from this String 669: * @param srcEnd index after the last character to be copied from this String 670: * @param dst character array which this String is copied into 671: * @param dstBegin index to start writing characters into dst 672: * @throws NullPointerException if dst is null 673: * @throws IndexOutOfBoundsException if any indices are out of bounds 674: * (while unspecified, source problems cause a 675: * StringIndexOutOfBoundsException, and dst problems cause an 676: * ArrayIndexOutOfBoundsException) 677: */ 678: public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) 679: { 680: if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count) 681: throw new StringIndexOutOfBoundsException(); 682: VMSystem.arraycopy(value, srcBegin + offset, 683: dst, dstBegin, srcEnd - srcBegin); 684: } 685: 686: /** 687: * Copies the low byte of each character from this String starting at a 688: * specified start index, ending at a specified stop index, to a byte array 689: * starting at a specified destination begin index. 690: * 691: * @param srcBegin index to being copying characters from this String 692: * @param srcEnd index after the last character to be copied from this String 693: * @param dst byte array which each low byte of this String is copied into 694: * @param dstBegin index to start writing characters into dst 695: * @throws NullPointerException if dst is null and copy length is non-zero 696: * @throws IndexOutOfBoundsException if any indices are out of bounds 697: * (while unspecified, source problems cause a 698: * StringIndexOutOfBoundsException, and dst problems cause an 699: * ArrayIndexOutOfBoundsException) 700: * @see #getBytes() 701: * @see #getBytes(String) 702: * @deprecated use {@link #getBytes()}, which uses a char to byte encoder 703: */ 704: public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) 705: { 706: if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count) 707: throw new StringIndexOutOfBoundsException(); 708: int i = srcEnd - srcBegin; 709: srcBegin += offset; 710: while (--i >= 0) 711: dst[dstBegin++] = (byte) value[srcBegin++]; 712: } 713: 714: /** 715: * Converts the Unicode characters in this String to a byte array. Uses the 716: * specified encoding method, so the result may be longer or shorter than 717: * the String. For more encoding control, use 718: * {@link java.nio.charset.CharsetEncoder}, and for valid character sets, 719: * see {@link java.nio.charset.Charset}. Unsupported characters get 720: * replaced by an encoding specific byte. 721: * 722: * @param enc encoding name 723: * @return the resulting byte array 724: * @throws NullPointerException if enc is null 725: * @throws UnsupportedEncodingException if encoding is not supported 726: * @since 1.1 727: */ 728: public byte[] getBytes(String enc) throws UnsupportedEncodingException 729: { 730: try 731: { 732: CharsetEncoder cse = Charset.forName(enc).newEncoder(); 733: cse.onMalformedInput(CodingErrorAction.REPLACE); 734: cse.onUnmappableCharacter(CodingErrorAction.REPLACE); 735: ByteBuffer bbuf = cse.encode(CharBuffer.wrap(value, offset, count)); 736: if(bbuf.hasArray()) 737: return bbuf.array(); 738: 739: // Doubt this will happen. But just in case. 740: byte[] bytes = new byte[bbuf.remaining()]; 741: bbuf.get(bytes); 742: return bytes; 743: } 744: catch(IllegalCharsetNameException e) 745: { 746: throw new UnsupportedEncodingException("Encoding: " + enc 747: + " not found."); 748: } 749: catch(UnsupportedCharsetException e) 750: { 751: throw new UnsupportedEncodingException("Encoding: " + enc 752: + " not found."); 753: } 754: catch(CharacterCodingException e) 755: { 756: // This shouldn't ever happen. 757: throw (InternalError) new InternalError().initCause(e); 758: } 759: } 760: 761: /** 762: * Converts the Unicode characters in this String to a byte array. Uses the 763: * encoding of the platform's default charset, so the result may be longer 764: * or shorter than the String. For more encoding control, use 765: * {@link java.nio.charset.CharsetEncoder}. Unsupported characters get 766: * replaced by an encoding specific byte. 767: * 768: * @return the resulting byte array, or null on a problem 769: * @since 1.1 770: */ 771: public byte[] getBytes() 772: { 773: try 774: { 775: return getBytes(System.getProperty("file.encoding")); 776: } catch(Exception e) { 777: // XXX - Throw an error here? 778: // For now, default to the 'safe' encoding. 779: byte[] bytes = new byte[count]; 780: for(int i=0;i<count;i++) 781: bytes[i] = (byte)((value[offset+i] <= 0xFF)? 782: value[offset+i]:'?'); 783: return bytes; 784: } 785: } 786: 787: /** 788: * Predicate which compares anObject to this. This is true only for Strings 789: * with the same character sequence. 790: * 791: * @param anObject the object to compare 792: * @return true if anObject is semantically equal to this 793: * @see #compareTo(String) 794: * @see #equalsIgnoreCase(String) 795: */ 796: public boolean equals(Object anObject) 797: { 798: if (! (anObject instanceof String)) 799: return false; 800: String str2 = (String) anObject; 801: if (count != str2.count) 802: return false; 803: if (value == str2.value && offset == str2.offset) 804: return true; 805: int i = count; 806: int x = offset; 807: int y = str2.offset; 808: while (--i >= 0) 809: if (value[x++] != str2.value[y++]) 810: return false; 811: return true; 812: } 813: 814: /** 815: * Compares the given StringBuffer to this String. This is true if the 816: * StringBuffer has the same content as this String at this moment. 817: * 818: * @param buffer the StringBuffer to compare to 819: * @return true if StringBuffer has the same character sequence 820: * @throws NullPointerException if the given StringBuffer is null 821: * @since 1.4 822: */ 823: public boolean contentEquals(StringBuffer buffer) 824: { 825: synchronized (buffer) 826: { 827: if (count != buffer.count) 828: return false; 829: if (value == buffer.value) 830: return true; // Possible if shared. 831: int i = count; 832: int x = offset + count; 833: while (--i >= 0) 834: if (value[--x] != buffer.value[i]) 835: return false; 836: return true; 837: } 838: } 839: 840: /** 841: * Compares the given CharSequence to this String. This is true if 842: * the CharSequence has the same content as this String at this 843: * moment. 844: * 845: * @param seq the CharSequence to compare to 846: * @return true if CharSequence has the same character sequence 847: * @throws NullPointerException if the given CharSequence is null 848: * @since 1.5 849: */ 850: public boolean contentEquals(CharSequence seq) 851: { 852: if (seq.length() != count) 853: return false; 854: for (int i = 0; i < count; ++i) 855: if (value[offset + i] != seq.charAt(i)) 856: return false; 857: return true; 858: } 859: 860: /** 861: * Compares a String to this String, ignoring case. This does not handle 862: * multi-character capitalization exceptions; instead the comparison is 863: * made on a character-by-character basis, and is true if:<br><ul> 864: * <li><code>c1 == c2</code></li> 865: * <li><code>Character.toUpperCase(c1) 866: * == Character.toUpperCase(c2)</code></li> 867: * <li><code>Character.toLowerCase(c1) 868: * == Character.toLowerCase(c2)</code></li> 869: * </ul> 870: * 871: * @param anotherString String to compare to this String 872: * @return true if anotherString is equal, ignoring case 873: * @see #equals(Object) 874: * @see Character#toUpperCase(char) 875: * @see Character#toLowerCase(char) 876: */ 877: public boolean equalsIgnoreCase(String anotherString) 878: { 879: if (anotherString == null || count != anotherString.count) 880: return false; 881: int i = count; 882: int x = offset; 883: int y = anotherString.offset; 884: while (--i >= 0) 885: { 886: char c1 = value[x++]; 887: char c2 = anotherString.value[y++]; 888: // Note that checking c1 != c2 is redundant, but avoids method calls. 889: if (c1 != c2 890: && Character.toUpperCase(c1) != Character.toUpperCase(c2) 891: && Character.toLowerCase(c1) != Character.toLowerCase(c2)) 892: return false; 893: } 894: return true; 895: } 896: 897: /** 898: * Compares this String and another String (case sensitive, 899: * lexicographically). The result is less than 0 if this string sorts 900: * before the other, 0 if they are equal, and greater than 0 otherwise. 901: * After any common starting sequence is skipped, the result is 902: * <code>this.charAt(k) - anotherString.charAt(k)</code> if both strings 903: * have characters remaining, or 904: * <code>this.length() - anotherString.length()</code> if one string is 905: * a subsequence of the other. 906: * 907: * @param anotherString the String to compare against 908: * @return the comparison 909: * @throws NullPointerException if anotherString is null 910: */ 911: public int compareTo(String anotherString) 912: { 913: int i = Math.min(count, anotherString.count); 914: int x = offset; 915: int y = anotherString.offset; 916: while (--i >= 0) 917: { 918: int result = value[x++] - anotherString.value[y++]; 919: if (result != 0) 920: return result; 921: } 922: return count - anotherString.count; 923: } 924: 925: /** 926: * Compares this String and another String (case insensitive). This 927: * comparison is <em>similar</em> to equalsIgnoreCase, in that it ignores 928: * locale and multi-characater capitalization, and compares characters 929: * after performing 930: * <code>Character.toLowerCase(Character.toUpperCase(c))</code> on each 931: * character of the string. This is unsatisfactory for locale-based 932: * comparison, in which case you should use {@link java.text.Collator}. 933: * 934: * @param str the string to compare against 935: * @return the comparison 936: * @see Collator#compare(String, String) 937: * @since 1.2 938: */ 939: public int compareToIgnoreCase(String str) 940: { 941: int i = Math.min(count, str.count); 942: int x = offset; 943: int y = str.offset; 944: while (--i >= 0) 945: { 946: int result = Character.toLowerCase(Character.toUpperCase(value[x++])) 947: - Character.toLowerCase(Character.toUpperCase(str.value[y++])); 948: if (result != 0) 949: return result; 950: } 951: return count - str.count; 952: } 953: 954: /** 955: * Predicate which determines if this String matches another String 956: * starting at a specified offset for each String and continuing 957: * for a specified length. Indices out of bounds are harmless, and give 958: * a false result. 959: * 960: * @param toffset index to start comparison at for this String 961: * @param other String to compare region to this String 962: * @param ooffset index to start comparison at for other 963: * @param len number of characters to compare 964: * @return true if regions match (case sensitive) 965: * @throws NullPointerException if other is null 966: */ 967: public boolean regionMatches(int toffset, String other, int ooffset, int len) 968: { 969: return regionMatches(false, toffset, other, ooffset, len); 970: } 971: 972: /** 973: * Predicate which determines if this String matches another String 974: * starting at a specified offset for each String and continuing 975: * for a specified length, optionally ignoring case. Indices out of bounds 976: * are harmless, and give a false result. Case comparisons are based on 977: * <code>Character.toLowerCase()</code> and 978: * <code>Character.toUpperCase()</code>, not on multi-character 979: * capitalization expansions. 980: * 981: * @param ignoreCase true if case should be ignored in comparision 982: * @param toffset index to start comparison at for this String 983: * @param other String to compare region to this String 984: * @param ooffset index to start comparison at for other 985: * @param len number of characters to compare 986: * @return true if regions match, false otherwise 987: * @throws NullPointerException if other is null 988: */ 989: public boolean regionMatches(boolean ignoreCase, int toffset, 990: String other, int ooffset, int len) 991: { 992: if (toffset < 0 || ooffset < 0 || toffset + len > count 993: || ooffset + len > other.count) 994: return false; 995: toffset += offset; 996: ooffset += other.offset; 997: while (--len >= 0) 998: { 999: char c1 = value[toffset++]; 1000: char c2 = other.value[ooffset++]; 1001: // Note that checking c1 != c2 is redundant when ignoreCase is true, 1002: // but it avoids method calls. 1003: if (c1 != c2 1004: && (! ignoreCase 1005: || (Character.toLowerCase(c1) != Character.toLowerCase(c2) 1006: && (Character.toUpperCase(c1) 1007: != Character.toUpperCase(c2))))) 1008: return false; 1009: } 1010: return true; 1011: } 1012: 1013: /** 1014: * Predicate which determines if this String contains the given prefix, 1015: * beginning comparison at toffset. The result is false if toffset is 1016: * negative or greater than this.length(), otherwise it is the same as 1017: * <code>this.substring(toffset).startsWith(prefix)</code>. 1018: * 1019: * @param prefix String to compare 1020: * @param toffset offset for this String where comparison starts 1021: * @return true if this String starts with prefix 1022: * @throws NullPointerException if prefix is null 1023: * @see #regionMatches(boolean, int, String, int, int) 1024: */ 1025: public boolean startsWith(String prefix, int toffset) 1026: { 1027: return regionMatches(false, toffset, prefix, 0, prefix.count); 1028: } 1029: 1030: /** 1031: * Predicate which determines if this String starts with a given prefix. 1032: * If the prefix is an empty String, true is returned. 1033: * 1034: * @param prefix String to compare 1035: * @return true if this String starts with the prefix 1036: * @throws NullPointerException if prefix is null 1037: * @see #startsWith(String, int) 1038: */ 1039: public boolean startsWith(String prefix) 1040: { 1041: return regionMatches(false, 0, prefix, 0, prefix.count); 1042: } 1043: 1044: /** 1045: * Predicate which determines if this String ends with a given suffix. 1046: * If the suffix is an empty String, true is returned. 1047: * 1048: * @param suffix String to compare 1049: * @return true if this String ends with the suffix 1050: * @throws NullPointerException if suffix is null 1051: * @see #regionMatches(boolean, int, String, int, int) 1052: */ 1053: public boolean endsWith(String suffix) 1054: { 1055: return regionMatches(false, count - suffix.count, suffix, 0, suffix.count); 1056: } 1057: 1058: /** 1059: * Computes the hashcode for this String. This is done with int arithmetic, 1060: * where ** represents exponentiation, by this formula:<br> 1061: * <code>s[0]*31**(n-1) + s[1]*31**(n-2) + ... + s[n-1]</code>. 1062: * 1063: * @return hashcode value of this String 1064: */ 1065: public int hashCode() 1066: { 1067: if (cachedHashCode != 0) 1068: return cachedHashCode; 1069: 1070: // Compute the hash code using a local variable to be reentrant. 1071: int hashCode = 0; 1072: int limit = count + offset; 1073: for (int i = offset; i < limit; i++) 1074: hashCode = hashCode * 31 + value[i]; 1075: return cachedHashCode = hashCode; 1076: } 1077: 1078: /** 1079: * Finds the first instance of a character in this String. 1080: * 1081: * @param ch character to find 1082: * @return location (base 0) of the character, or -1 if not found 1083: */ 1084: public int indexOf(int ch) 1085: { 1086: return indexOf(ch, 0); 1087: } 1088: 1089: /** 1090: * Finds the first instance of a character in this String, starting at 1091: * a given index. If starting index is less than 0, the search 1092: * starts at the beginning of this String. If the starting index 1093: * is greater than the length of this String, -1 is returned. 1094: * 1095: * @param ch character to find 1096: * @param fromIndex index to start the search 1097: * @return location (base 0) of the character, or -1 if not found 1098: */ 1099: public int indexOf(int ch, int fromIndex) 1100: { 1101: if ((char) ch != ch) 1102: return -1; 1103: if (fromIndex < 0) 1104: fromIndex = 0; 1105: int i = fromIndex + offset; 1106: for ( ; fromIndex < count; fromIndex++) 1107: if (value[i++] == ch) 1108: return fromIndex; 1109: return -1; 1110: } 1111: 1112: /** 1113: * Finds the last instance of a character in this String. 1114: * 1115: * @param ch character to find 1116: * @return location (base 0) of the character, or -1 if not found 1117: */ 1118: public int lastIndexOf(int ch) 1119: { 1120: return lastIndexOf(ch, count - 1); 1121: } 1122: 1123: /** 1124: * Finds the last instance of a character in this String, starting at 1125: * a given index. If starting index is greater than the maximum valid 1126: * index, then the search begins at the end of this String. If the 1127: * starting index is less than zero, -1 is returned. 1128: * 1129: * @param ch character to find 1130: * @param fromIndex index to start the search 1131: * @return location (base 0) of the character, or -1 if not found 1132: */ 1133: public int lastIndexOf(int ch, int fromIndex) 1134: { 1135: if ((char) ch != ch) 1136: return -1; 1137: if (fromIndex >= count) 1138: fromIndex = count - 1; 1139: int i = fromIndex + offset; 1140: for ( ; fromIndex >= 0; fromIndex--) 1141: if (value[i--] == ch) 1142: return fromIndex; 1143: return -1; 1144: } 1145: 1146: /** 1147: * Finds the first instance of a String in this String. 1148: * 1149: * @param str String to find 1150: * @return location (base 0) of the String, or -1 if not found 1151: * @throws NullPointerException if str is null 1152: */ 1153: public int indexOf(String str) 1154: { 1155: return indexOf(str, 0); 1156: } 1157: 1158: /** 1159: * Finds the first instance of a String in this String, starting at 1160: * a given index. If starting index is less than 0, the search 1161: * starts at the beginning of this String. If the starting index 1162: * is greater than the length of this String, -1 is returned. 1163: * 1164: * @param str String to find 1165: * @param fromIndex index to start the search 1166: * @return location (base 0) of the String, or -1 if not found 1167: * @throws NullPointerException if str is null 1168: */ 1169: public int indexOf(String str, int fromIndex) 1170: { 1171: if (fromIndex < 0) 1172: fromIndex = 0; 1173: int limit = count - str.count; 1174: for ( ; fromIndex <= limit; fromIndex++) 1175: if (regionMatches(fromIndex, str, 0, str.count)) 1176: return fromIndex; 1177: return -1; 1178: } 1179: 1180: /** 1181: * Finds the last instance of a String in this String. 1182: * 1183: * @param str String to find 1184: * @return location (base 0) of the String, or -1 if not found 1185: * @throws NullPointerException if str is null 1186: */ 1187: public int lastIndexOf(String str) 1188: { 1189: return lastIndexOf(str, count - str.count); 1190: } 1191: 1192: /** 1193: * Finds the last instance of a String in this String, starting at 1194: * a given index. If starting index is greater than the maximum valid 1195: * index, then the search begins at the end of this String. If the 1196: * starting index is less than zero, -1 is returned. 1197: * 1198: * @param str String to find 1199: * @param fromIndex index to start the search 1200: * @return location (base 0) of the String, or -1 if not found 1201: * @throws NullPointerException if str is null 1202: */ 1203: public int lastIndexOf(String str, int fromIndex) 1204: { 1205: fromIndex = Math.min(fromIndex, count - str.count); 1206: for ( ; fromIndex >= 0; fromIndex--) 1207: if (regionMatches(fromIndex, str, 0, str.count)) 1208: return fromIndex; 1209: return -1; 1210: } 1211: 1212: /** 1213: * Creates a substring of this String, starting at a specified index 1214: * and ending at the end of this String. 1215: * 1216: * @param begin index to start substring (base 0) 1217: * @return new String which is a substring of this String 1218: * @throws IndexOutOfBoundsException if begin < 0 || begin > length() 1219: * (while unspecified, this is a StringIndexOutOfBoundsException) 1220: */ 1221: public String substring(int begin) 1222: { 1223: return substring(begin, count); 1224: } 1225: 1226: /** 1227: * Creates a substring of this String, starting at a specified index 1228: * and ending at one character before a specified index. 1229: * 1230: * @param beginIndex index to start substring (inclusive, base 0) 1231: * @param endIndex index to end at (exclusive) 1232: * @return new String which is a substring of this String 1233: * @throws IndexOutOfBoundsException if begin < 0 || end > length() 1234: * || begin > end (while unspecified, this is a 1235: * StringIndexOutOfBoundsException) 1236: */ 1237: public String substring(int beginIndex, int endIndex) 1238: { 1239: if (beginIndex < 0 || endIndex > count || beginIndex > endIndex) 1240: throw new StringIndexOutOfBoundsException(); 1241: if (beginIndex == 0 && endIndex == count) 1242: return this; 1243: int len = endIndex - beginIndex; 1244: // Package constructor avoids an array copy. 1245: return new String(value, beginIndex + offset, len, 1246: (len << 2) >= value.length); 1247: } 1248: 1249: /** 1250: * Creates a substring of this String, starting at a specified index 1251: * and ending at one character before a specified index. This behaves like 1252: * <code>substring(begin, end)</code>. 1253: * 1254: * @param begin index to start substring (inclusive, base 0) 1255: * @param end index to end at (exclusive) 1256: * @return new String which is a substring of this String 1257: * @throws IndexOutOfBoundsException if begin < 0 || end > length() 1258: * || begin > end 1259: * @since 1.4 1260: */ 1261: public CharSequence subSequence(int begin, int end) 1262: { 1263: return substring(begin, end); 1264: } 1265: 1266: /** 1267: * Concatenates a String to this String. This results in a new string unless 1268: * one of the two originals is "". 1269: * 1270: * @param str String to append to this String 1271: * @return newly concatenated String 1272: * @throws NullPointerException if str is null 1273: */ 1274: public String concat(String str) 1275: { 1276: if (str.count == 0) 1277: return this; 1278: if (count == 0) 1279: return str; 1280: char[] newStr = new char[count + str.count]; 1281: VMSystem.arraycopy(value, offset, newStr, 0, count); 1282: VMSystem.arraycopy(str.value, str.offset, newStr, count, str.count); 1283: // Package constructor avoids an array copy. 1284: return new String(newStr, 0, newStr.length, true); 1285: } 1286: 1287: /** 1288: * Replaces every instance of a character in this String with a new 1289: * character. If no replacements occur, this is returned. 1290: * 1291: * @param oldChar the old character to replace 1292: * @param newChar the new character 1293: * @return new String with all instances of oldChar replaced with newChar 1294: */ 1295: public String replace(char oldChar, char newChar) 1296: { 1297: if (oldChar == newChar) 1298: return this; 1299: int i = count; 1300: int x = offset - 1; 1301: while (--i >= 0) 1302: if (value[++x] == oldChar) 1303: break; 1304: if (i < 0) 1305: return this; 1306: char[] newStr = (char[]) value.clone(); 1307: newStr[x] = newChar; 1308: while (--i >= 0) 1309: if (value[++x] == oldChar) 1310: newStr[x] = newChar; 1311: // Package constructor avoids an array copy. 1312: return new String(newStr, offset, count, true); 1313: } 1314: 1315: /** 1316: * Test if this String matches a regular expression. This is shorthand for 1317: * <code>{@link Pattern}.matches(regex, this)</code>. 1318: * 1319: * @param regex the pattern to match 1320: * @return true if the pattern matches 1321: * @throws NullPointerException if regex is null 1322: * @throws PatternSyntaxException if regex is invalid 1323: * @see Pattern#matches(String, CharSequence) 1324: * @since 1.4 1325: */ 1326: public boolean matches(String regex) 1327: { 1328: return Pattern.matches(regex, this); 1329: } 1330: 1331: /** 1332: * Replaces the first substring match of the regular expression with a 1333: * given replacement. This is shorthand for <code>{@link Pattern} 1334: * .compile(regex).matcher(this).replaceFirst(replacement)</code>. 1335: * 1336: * @param regex the pattern to match 1337: * @param replacement the replacement string 1338: * @return the modified string 1339: * @throws NullPointerException if regex or replacement is null 1340: * @throws PatternSyntaxException if regex is invalid 1341: * @see #replaceAll(String, String) 1342: * @see Pattern#compile(String) 1343: * @see Pattern#matcher(CharSequence) 1344: * @see Matcher#replaceFirst(String) 1345: * @since 1.4 1346: */ 1347: public String replaceFirst(String regex, String replacement) 1348: { 1349: return Pattern.compile(regex).matcher(this).replaceFirst(replacement); 1350: } 1351: 1352: /** 1353: * Replaces all matching substrings of the regular expression with a 1354: * given replacement. This is shorthand for <code>{@link Pattern} 1355: * .compile(regex).matcher(this).replaceAll(replacement)</code>. 1356: * 1357: * @param regex the pattern to match 1358: * @param replacement the replacement string 1359: * @return the modified string 1360: * @throws NullPointerException if regex or replacement is null 1361: * @throws PatternSyntaxException if regex is invalid 1362: * @see #replaceFirst(String, String) 1363: * @see Pattern#compile(String) 1364: * @see Pattern#matcher(CharSequence) 1365: * @see Matcher#replaceAll(String) 1366: * @since 1.4 1367: */ 1368: public String replaceAll(String regex, String replacement) 1369: { 1370: return Pattern.compile(regex).matcher(this).replaceAll(replacement); 1371: } 1372: 1373: /** 1374: * Split this string around the matches of a regular expression. Each 1375: * element of the returned array is the largest block of characters not 1376: * terminated by the regular expression, in the order the matches are found. 1377: * 1378: * <p>The limit affects the length of the array. If it is positive, the 1379: * array will contain at most n elements (n - 1 pattern matches). If 1380: * negative, the array length is unlimited, but there can be trailing empty 1381: * entries. if 0, the array length is unlimited, and trailing empty entries 1382: * are discarded. 1383: * 1384: * <p>For example, splitting "boo:and:foo" yields:<br> 1385: * <table border=0> 1386: * <th><td>Regex</td> <td>Limit</td> <td>Result</td></th> 1387: * <tr><td>":"</td> <td>2</td> <td>{ "boo", "and:foo" }</td></tr> 1388: * <tr><td>":"</td> <td>t</td> <td>{ "boo", "and", "foo" }</td></tr> 1389: * <tr><td>":"</td> <td>-2</td> <td>{ "boo", "and", "foo" }</td></tr> 1390: * <tr><td>"o"</td> <td>5</td> <td>{ "b", "", ":and:f", "", "" }</td></tr> 1391: * <tr><td>"o"</td> <td>-2</td> <td>{ "b", "", ":and:f", "", "" }</td></tr> 1392: * <tr><td>"o"</td> <td>0</td> <td>{ "b", "", ":and:f" }</td></tr> 1393: * </table> 1394: * 1395: * <p>This is shorthand for 1396: * <code>{@link Pattern}.compile(regex).split(this, limit)</code>. 1397: * 1398: * @param regex the pattern to match 1399: * @param limit the limit threshold 1400: * @return the array of split strings 1401: * @throws NullPointerException if regex or replacement is null 1402: * @throws PatternSyntaxException if regex is invalid 1403: * @see Pattern#compile(String) 1404: * @see Pattern#split(CharSequence, int) 1405: * @since 1.4 1406: */ 1407: public String[] split(String regex, int limit) 1408: { 1409: return Pattern.compile(regex).split(this, limit); 1410: } 1411: 1412: /** 1413: * Split this string around the matches of a regular expression. Each 1414: * element of the returned array is the largest block of characters not 1415: * terminated by the regular expression, in the order the matches are found. 1416: * The array length is unlimited, and trailing empty entries are discarded, 1417: * as though calling <code>split(regex, 0)</code>. 1418: * 1419: * @param regex the pattern to match 1420: * @return the array of split strings 1421: * @throws NullPointerException if regex or replacement is null 1422: * @throws PatternSyntaxException if regex is invalid 1423: * @see #split(String, int) 1424: * @see Pattern#compile(String) 1425: * @see Pattern#split(CharSequence, int) 1426: * @since 1.4 1427: */ 1428: public String[] split(String regex) 1429: { 1430: return Pattern.compile(regex).split(this, 0); 1431: } 1432: 1433: /** 1434: * Lowercases this String according to a particular locale. This uses 1435: * Unicode's special case mappings, as applied to the given Locale, so the 1436: * resulting string may be a different length. 1437: * 1438: * @param loc locale to use 1439: * @return new lowercased String, or this if no characters were lowercased 1440: * @throws NullPointerException if loc is null 1441: * @see #toUpperCase(Locale) 1442: * @since 1.1 1443: */ 1444: public String toLowerCase(Locale loc) 1445: { 1446: // First, see if the current string is already lower case. 1447: boolean turkish = "tr".equals(loc.getLanguage()); 1448: int i = count; 1449: int x = offset - 1; 1450: while (--i >= 0) 1451: { 1452: char ch = value[++x]; 1453: if ((turkish && ch == '\u0049') 1454: || ch != Character.toLowerCase(ch)) 1455: break; 1456: } 1457: if (i < 0) 1458: return this; 1459: 1460: // Now we perform the conversion. Fortunately, there are no multi-character 1461: // lowercase expansions in Unicode 3.0.0. 1462: char[] newStr = (char[]) value.clone(); 1463: do 1464: { 1465: char ch = value[x]; 1466: // Hardcoded special case. 1467: newStr[x++] = (turkish && ch == '\u0049') ? '\u0131' 1468: : Character.toLowerCase(ch); 1469: } 1470: while (--i >= 0); 1471: // Package constructor avoids an array copy. 1472: return new String(newStr, offset, count, true); 1473: } 1474: 1475: /** 1476: * Lowercases this String. This uses Unicode's special case mappings, as 1477: * applied to the platform's default Locale, so the resulting string may 1478: * be a different length. 1479: * 1480: * @return new lowercased String, or this if no characters were lowercased 1481: * @see #toLowerCase(Locale) 1482: * @see #toUpperCase() 1483: */ 1484: public String toLowerCase() 1485: { 1486: return toLowerCase(Locale.getDefault()); 1487: } 1488: 1489: /** 1490: * Uppercases this String according to a particular locale. This uses 1491: * Unicode's special case mappings, as applied to the given Locale, so the 1492: * resulting string may be a different length. 1493: * 1494: * @param loc locale to use 1495: * @return new uppercased String, or this if no characters were uppercased 1496: * @throws NullPointerException if loc is null 1497: * @see #toLowerCase(Locale) 1498: * @since 1.1 1499: */ 1500: public String toUpperCase(Locale loc) 1501: { 1502: // First, see how many characters we have to grow by, as well as if the 1503: // current string is already upper case. 1504: boolean turkish = "tr".equals(loc.getLanguage()); 1505: int expand = 0; 1506: boolean unchanged = true; 1507: int i = count; 1508: int x = i + offset; 1509: while (--i >= 0) 1510: { 1511: char ch = value[--x]; 1512: expand += upperCaseExpansion(ch); 1513: unchanged = (unchanged && expand == 0 1514: && ! (turkish && ch == '\u0069') 1515: && ch == Character.toUpperCase(ch)); 1516: } 1517: if (unchanged) 1518: return this; 1519: 1520: // Now we perform the conversion. 1521: i = count; 1522: if (expand == 0) 1523: { 1524: char[] newStr = (char[]) value.clone(); 1525: while (--i >= 0) 1526: { 1527: char ch = value[x]; 1528: // Hardcoded special case. 1529: newStr[x++] = (turkish && ch == '\u0069') ? '\u0130' 1530: : Character.toUpperCase(ch); 1531: } 1532: // Package constructor avoids an array copy. 1533: return new String(newStr, offset, count, true); 1534: } 1535: 1536: // Expansion is necessary. 1537: char[] newStr = new char[count + expand]; 1538: int j = 0; 1539: while (--i >= 0) 1540: { 1541: char ch = value[x++]; 1542: // Hardcoded special case. 1543: if (turkish && ch == '\u0069') 1544: { 1545: newStr[j++] = '\u0130'; 1546: continue; 1547: } 1548: expand = upperCaseExpansion(ch); 1549: if (expand > 0) 1550: { 1551: int index = upperCaseIndex(ch); 1552: while (expand-- >= 0) 1553: newStr[j++] = upperExpand[index++]; 1554: } 1555: else 1556: newStr[j++] = Character.toUpperCase(ch); 1557: } 1558: // Package constructor avoids an array copy. 1559: return new String(newStr, 0, newStr.length, true); 1560: } 1561: 1562: /** 1563: * Uppercases this String. This uses Unicode's special case mappings, as 1564: * applied to the platform's default Locale, so the resulting string may 1565: * be a different length. 1566: * 1567: * @return new uppercased String, or this if no characters were uppercased 1568: * @see #toUpperCase(Locale) 1569: * @see #toLowerCase() 1570: */ 1571: public String toUpperCase() 1572: { 1573: return toUpperCase(Locale.getDefault()); 1574: } 1575: 1576: /** 1577: * Trims all characters less than or equal to <code>'\u0020'</code> 1578: * (<code>' '</code>) from the beginning and end of this String. This 1579: * includes many, but not all, ASCII control characters, and all 1580: * {@link Character#isWhitespace(char)}. 1581: * 1582: * @return new trimmed String, or this if nothing trimmed 1583: */ 1584: public String trim() 1585: { 1586: int limit = count + offset; 1587: if (count == 0 || (value[offset] > '\u0020' 1588: && value[limit - 1] > '\u0020')) 1589: return this; 1590: int begin = offset; 1591: do 1592: if (begin == limit) 1593: return ""; 1594: while (value[begin++] <= '\u0020'); 1595: 1596: int end = limit; 1597: while (value[--end] <= '\u0020') 1598: ; 1599: return substring(begin - offset - 1, end - offset + 1); 1600: } 1601: 1602: /** 1603: * Returns this, as it is already a String! 1604: * 1605: * @return this 1606: */ 1607: public String toString() 1608: { 1609: return this; 1610: } 1611: 1612: /** 1613: * Copies the contents of this String into a character array. Subsequent 1614: * changes to the array do not affect the String. 1615: * 1616: * @return character array copying the String 1617: */ 1618: public char[] toCharArray() 1619: { 1620: if (count == value.length) 1621: return (char[]) value.clone(); 1622: 1623: char[] copy = new char[count]; 1624: VMSystem.arraycopy(value, offset, copy, 0, count); 1625: return copy; 1626: } 1627: 1628: /** 1629: * Returns a String representation of an Object. This is "null" if the 1630: * object is null, otherwise it is <code>obj.toString()</code> (which 1631: * can be null). 1632: * 1633: * @param obj the Object 1634: * @return the string conversion of obj 1635: */ 1636: public static String valueOf(Object obj) 1637: { 1638: return obj == null ? "null" : obj.toString(); 1639: } 1640: 1641: /** 1642: * Returns a String representation of a character array. Subsequent 1643: * changes to the array do not affect the String. 1644: * 1645: * @param data the character array 1646: * @return a String containing the same character sequence as data 1647: * @throws NullPointerException if data is null 1648: * @see #valueOf(char[], int, int) 1649: * @see #String(char[]) 1650: */ 1651: public static String valueOf(char[] data) 1652: { 1653: return valueOf (data, 0, data.length); 1654: } 1655: 1656: /** 1657: * Returns a String representing the character sequence of the char array, 1658: * starting at the specified offset, and copying chars up to the specified 1659: * count. Subsequent changes to the array do not affect the String. 1660: * 1661: * @param data character array 1662: * @param offset position (base 0) to start copying out of data 1663: * @param count the number of characters from data to copy 1664: * @return String containing the chars from data[offset..offset+count] 1665: * @throws NullPointerException if data is null 1666: * @throws IndexOutOfBoundsException if (offset < 0 || count < 0 1667: * || offset + count > data.length) 1668: * (while unspecified, this is a StringIndexOutOfBoundsException) 1669: * @see #String(char[], int, int) 1670: */ 1671: public static String valueOf(char[] data, int offset, int count) 1672: { 1673: return new String(data, offset, count, false); 1674: } 1675: 1676: /** 1677: * Returns a String representing the character sequence of the char array, 1678: * starting at the specified offset, and copying chars up to the specified 1679: * count. Subsequent changes to the array do not affect the String. 1680: * 1681: * @param data character array 1682: * @param offset position (base 0) to start copying out of data 1683: * @param count the number of characters from data to copy 1684: * @return String containing the chars from data[offset..offset+count] 1685: * @throws NullPointerException if data is null 1686: * @throws IndexOutOfBoundsException if (offset < 0 || count < 0 1687: * || offset + count < 0 (overflow) 1688: * || offset + count < 0 (overflow) 1689: * || offset + count > data.length) 1690: * (while unspecified, this is a StringIndexOutOfBoundsException) 1691: * @see #String(char[], int, int) 1692: */ 1693: public static String copyValueOf(char[] data, int offset, int count) 1694: { 1695: return new String(data, offset, count, false); 1696: } 1697: 1698: /** 1699: * Returns a String representation of a character array. Subsequent 1700: * changes to the array do not affect the String. 1701: * 1702: * @param data the character array 1703: * @return a String containing the same character sequence as data 1704: * @throws NullPointerException if data is null 1705: * @see #copyValueOf(char[], int, int) 1706: * @see #String(char[]) 1707: */ 1708: public static String copyValueOf(char[] data) 1709: { 1710: return copyValueOf (data, 0, data.length); 1711: } 1712: 1713: /** 1714: * Returns a String representing a boolean. 1715: * 1716: * @param b the boolean 1717: * @return "true" if b is true, else "false" 1718: */ 1719: public static String valueOf(boolean b) 1720: { 1721: return b ? "true" : "false"; 1722: } 1723: 1724: /** 1725: * Returns a String representing a character. 1726: * 1727: * @param c the character 1728: * @return String containing the single character c 1729: */ 1730: public static String valueOf(char c) 1731: { 1732: // Package constructor avoids an array copy. 1733: return new String(new char[] { c }, 0, 1, true); 1734: } 1735: 1736: /** 1737: * Returns a String representing an integer. 1738: * 1739: * @param i the integer 1740: * @return String containing the integer in base 10 1741: * @see Integer#toString(int) 1742: */ 1743: public static String valueOf(int i) 1744: { 1745: // See Integer to understand why we call the two-arg variant. 1746: return Integer.toString(i, 10); 1747: } 1748: 1749: /** 1750: * Returns a String representing a long. 1751: * 1752: * @param l the long 1753: * @return String containing the long in base 10 1754: * @see Long#toString(long) 1755: */ 1756: public static String valueOf(long l) 1757: { 1758: return Long.toString(l); 1759: } 1760: 1761: /** 1762: * Returns a String representing a float. 1763: * 1764: * @param f the float 1765: * @return String containing the float 1766: * @see Float#toString(float) 1767: */ 1768: public static String valueOf(float f) 1769: { 1770: return Float.toString(f); 1771: } 1772: 1773: /** 1774: * Returns a String representing a double. 1775: * 1776: * @param d the double 1777: * @return String containing the double 1778: * @see Double#toString(double) 1779: */ 1780: public static String valueOf(double d) 1781: { 1782: return Double.toString(d); 1783: } 1784: 1785: 1786: /** @since 1.5 */ 1787: public static String format(Locale locale, String format, Object... args) 1788: { 1789: Formatter f = new Formatter(locale); 1790: return f.format(format, args).toString(); 1791: } 1792: 1793: /** @since 1.5 */ 1794: public static String format(String format, Object... args) 1795: { 1796: return format(Locale.getDefault(), format, args); 1797: } 1798: 1799: /** 1800: * If two Strings are considered equal, by the equals() method, 1801: * then intern() will return the same String instance. ie. 1802: * if (s1.equals(s2)) then (s1.intern() == s2.intern()). 1803: * All string literals and string-valued constant expressions 1804: * are already interned. 1805: * 1806: * @return the interned String 1807: */ 1808: public String intern() 1809: { 1810: return VMString.intern(this); 1811: } 1812: 1813: /** 1814: * Return the number of code points between two indices in the 1815: * <code>String</code>. An unpaired surrogate counts as a 1816: * code point for this purpose. Characters outside the indicated 1817: * range are not examined, even if the range ends in the middle of a 1818: * surrogate pair. 1819: * 1820: * @param start the starting index 1821: * @param end one past the ending index 1822: * @return the number of code points 1823: * @since 1.5 1824: */ 1825: public synchronized int codePointCount(int start, int end) 1826: { 1827: if (start < 0 || end > count || start > end) 1828: throw new StringIndexOutOfBoundsException(); 1829: 1830: start += offset; 1831: end += offset; 1832: int count = 0; 1833: while (start < end) 1834: { 1835: char base = value[start]; 1836: if (base < Character.MIN_HIGH_SURROGATE 1837: || base > Character.MAX_HIGH_SURROGATE 1838: || start == end 1839: || start == count 1840: || value[start + 1] < Character.MIN_LOW_SURROGATE 1841: || value[start + 1] > Character.MAX_LOW_SURROGATE) 1842: { 1843: // Nothing. 1844: } 1845: else 1846: { 1847: // Surrogate pair. 1848: ++start; 1849: } 1850: ++start; 1851: ++count; 1852: } 1853: return count; 1854: } 1855: 1856: /** 1857: * Helper function used to detect which characters have a multi-character 1858: * uppercase expansion. Note that this is only used in locations which 1859: * track one-to-many capitalization (java.lang.Character does not do this). 1860: * As of Unicode 3.0.0, the result is limited in the range 0 to 2, as the 1861: * longest uppercase expansion is three characters (a growth of 2 from the 1862: * lowercase character). 1863: * 1864: * @param ch the char to check 1865: * @return the number of characters to add when converting to uppercase 1866: * @see CharData#DIRECTION 1867: * @see CharData#UPPER_SPECIAL 1868: * @see #toUpperCase(Locale) 1869: */ 1870: private static int upperCaseExpansion(char ch) 1871: { 1872: return Character.direction[0][Character.readCodePoint((int)ch) >> 7] & 3; 1873: } 1874: 1875: /** 1876: * Helper function used to locate the offset in upperExpand given a 1877: * character with a multi-character expansion. The binary search is 1878: * optimized under the assumption that this method will only be called on 1879: * characters which exist in upperSpecial. 1880: * 1881: * @param ch the char to check 1882: * @return the index where its expansion begins 1883: * @see CharData#UPPER_SPECIAL 1884: * @see CharData#UPPER_EXPAND 1885: * @see #toUpperCase(Locale) 1886: */ 1887: private static int upperCaseIndex(char ch) 1888: { 1889: // Simple binary search for the correct character. 1890: int low = 0; 1891: int hi = upperSpecial.length - 2; 1892: int mid = ((low + hi) >> 2) << 1; 1893: char c = upperSpecial[mid]; 1894: while (ch != c) 1895: { 1896: if (ch < c) 1897: hi = mid - 2; 1898: else 1899: low = mid + 2; 1900: mid = ((low + hi) >> 2) << 1; 1901: c = upperSpecial[mid]; 1902: } 1903: return upperSpecial[mid + 1]; 1904: } 1905: 1906: /** 1907: * Returns the value array of the given string if it is zero based or a 1908: * copy of it that is zero based (stripping offset and making length equal 1909: * to count). Used for accessing the char[]s of gnu.java.lang.CharData. 1910: * Package private for use in Character. 1911: */ 1912: static char[] zeroBasedStringValue(String s) 1913: { 1914: char[] value; 1915: 1916: if (s.offset == 0 && s.count == s.value.length) 1917: value = s.value; 1918: else 1919: { 1920: int count = s.count; 1921: value = new char[count]; 1922: VMSystem.arraycopy(s.value, s.offset, value, 0, count); 1923: } 1924: 1925: return value; 1926: } 1927: 1928: /** 1929: * Returns true iff this String contains the sequence of Characters 1930: * described in s. 1931: * @param s the CharSequence 1932: * @return true iff this String contains s 1933: * 1934: * @since 1.5 1935: */ 1936: public boolean contains (CharSequence s) 1937: { 1938: return this.indexOf(s.toString()) != -1; 1939: } 1940: 1941: /** 1942: * Returns a string that is this string with all instances of the sequence 1943: * represented by <code>target</code> replaced by the sequence in 1944: * <code>replacement</code>. 1945: * @param target the sequence to be replaced 1946: * @param replacement the sequence used as the replacement 1947: * @return the string constructed as above 1948: */ 1949: public String replace (CharSequence target, CharSequence replacement) 1950: { 1951: String targetString = target.toString(); 1952: String replaceString = replacement.toString(); 1953: int targetLength = target.length(); 1954: int replaceLength = replacement.length(); 1955: 1956: int startPos = this.indexOf(targetString); 1957: StringBuilder result = new StringBuilder(this); 1958: while (startPos != -1) 1959: { 1960: // Replace the target with the replacement 1961: result.replace(startPos, startPos + targetLength, replaceString); 1962: 1963: // Search for a new occurrence of the target 1964: startPos = result.indexOf(targetString, startPos + replaceLength); 1965: } 1966: return result.toString(); 1967: } 1968: 1969: /** 1970: * Return the index into this String that is offset from the given index by 1971: * <code>codePointOffset</code> code points. 1972: * @param index the index at which to start 1973: * @param codePointOffset the number of code points to offset 1974: * @return the index into this String that is <code>codePointOffset</code> 1975: * code points offset from <code>index</code>. 1976: * 1977: * @throws IndexOutOfBoundsException if index is negative or larger than the 1978: * length of this string. 1979: * @throws IndexOutOfBoundsException if codePointOffset is positive and the 1980: * substring starting with index has fewer than codePointOffset code points. 1981: * @throws IndexOutOfBoundsException if codePointOffset is negative and the 1982: * substring ending with index has fewer than (-codePointOffset) code points. 1983: * @since 1.5 1984: */ 1985: public int offsetByCodePoints(int index, int codePointOffset) 1986: { 1987: if (index < 0 || index > count) 1988: throw new IndexOutOfBoundsException(); 1989: 1990: return Character.offsetByCodePoints(value, offset, count, offset + index, 1991: codePointOffset); 1992: } 1993: 1994: /** 1995: * Returns true if, and only if, {@link #length()} 1996: * is <code>0</code>. 1997: * 1998: * @return true if the length of the string is zero. 1999: * @since 1.6 2000: */ 2001: public boolean isEmpty() 2002: { 2003: return count == 0; 2004: } 2005: 2006: }
GNU Classpath (0.95) |