GNU Classpath (0.95) | |
Frames | No Frames |
1: /* InputStreamReader.java -- Reader than transforms bytes to chars 2: Copyright (C) 1998, 1999, 2001, 2003, 2004, 2005, 2006 3: Free Software Foundation, Inc. 4: 5: This file is part of GNU Classpath. 6: 7: GNU Classpath is free software; you can redistribute it and/or modify 8: it under the terms of the GNU General Public License as published by 9: the Free Software Foundation; either version 2, or (at your option) 10: any later version. 11: 12: GNU Classpath is distributed in the hope that it will be useful, but 13: WITHOUT ANY WARRANTY; without even the implied warranty of 14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15: General Public License for more details. 16: 17: You should have received a copy of the GNU General Public License 18: along with GNU Classpath; see the file COPYING. If not, write to the 19: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 20: 02110-1301 USA. 21: 22: Linking this library statically or dynamically with other modules is 23: making a combined work based on this library. Thus, the terms and 24: conditions of the GNU General Public License cover the whole 25: combination. 26: 27: As a special exception, the copyright holders of this library give you 28: permission to link this library with independent modules to produce an 29: executable, regardless of the license terms of these independent 30: modules, and to copy and distribute the resulting executable under 31: terms of your choice, provided that you also meet, for each linked 32: independent module, the terms and conditions of the license of that 33: module. An independent module is a module which is not derived from 34: or based on this library. If you modify this library, you may extend 35: this exception to your version of the library, but you are not 36: obligated to do so. If you do not wish to do so, delete this 37: exception statement from your version. */ 38: 39: 40: package java.io; 41: 42: import gnu.classpath.SystemProperties; 43: import gnu.java.nio.charset.EncodingHelper; 44: 45: import java.nio.ByteBuffer; 46: import java.nio.CharBuffer; 47: import java.nio.charset.Charset; 48: import java.nio.charset.CharsetDecoder; 49: import java.nio.charset.CoderResult; 50: import java.nio.charset.CodingErrorAction; 51: 52: /** 53: * This class reads characters from a byte input stream. The characters 54: * read are converted from bytes in the underlying stream by a 55: * decoding layer. The decoding layer transforms bytes to chars according 56: * to an encoding standard. There are many available encodings to choose 57: * from. The desired encoding can either be specified by name, or if no 58: * encoding is selected, the system default encoding will be used. The 59: * system default encoding name is determined from the system property 60: * <code>file.encoding</code>. The only encodings that are guaranteed to 61: * be availalbe are "8859_1" (the Latin-1 character set) and "UTF8". 62: * Unforunately, Java does not provide a mechanism for listing the 63: * ecodings that are supported in a given implementation. 64: * <p> 65: * Here is a list of standard encoding names that may be available: 66: * <p> 67: * <ul> 68: * <li>8859_1 (ISO-8859-1/Latin-1)</li> 69: * <li>8859_2 (ISO-8859-2/Latin-2)</li> 70: * <li>8859_3 (ISO-8859-3/Latin-3)</li> 71: * <li>8859_4 (ISO-8859-4/Latin-4)</li> 72: * <li>8859_5 (ISO-8859-5/Latin-5)</li> 73: * <li>8859_6 (ISO-8859-6/Latin-6)</li> 74: * <li>8859_7 (ISO-8859-7/Latin-7)</li> 75: * <li>8859_8 (ISO-8859-8/Latin-8)</li> 76: * <li>8859_9 (ISO-8859-9/Latin-9)</li> 77: * <li>ASCII (7-bit ASCII)</li> 78: * <li>UTF8 (UCS Transformation Format-8)</li> 79: * <li>More later</li> 80: * </ul> 81: * <p> 82: * It is recommended that applications do not use 83: * <code>InputStreamReader</code>'s 84: * directly. Rather, for efficiency purposes, an object of this class 85: * should be wrapped by a <code>BufferedReader</code>. 86: * <p> 87: * Due to a deficiency the Java class library design, there is no standard 88: * way for an application to install its own byte-character encoding. 89: * 90: * @see BufferedReader 91: * @see InputStream 92: * 93: * @author Robert Schuster 94: * @author Aaron M. Renn (arenn@urbanophile.com) 95: * @author Per Bothner (bothner@cygnus.com) 96: * @date April 22, 1998. 97: */ 98: public class InputStreamReader extends Reader 99: { 100: /** 101: * The input stream. 102: */ 103: private InputStream in; 104: 105: /** 106: * The charset decoder. 107: */ 108: private CharsetDecoder decoder; 109: 110: /** 111: * End of stream reached. 112: */ 113: private boolean isDone = false; 114: 115: /** 116: * Need this. 117: */ 118: private float maxBytesPerChar; 119: 120: /** 121: * Buffer holding surplus loaded bytes (if any) 122: */ 123: private ByteBuffer byteBuffer; 124: 125: /** 126: * java.io canonical name of the encoding. 127: */ 128: private String encoding; 129: 130: /** 131: * We might decode to a 2-char UTF-16 surrogate, which won't fit in the 132: * output buffer. In this case we need to save the surrogate char. 133: */ 134: private char savedSurrogate; 135: private boolean hasSavedSurrogate = false; 136: 137: /** 138: * A byte array to be reused in read(byte[], int, int). 139: */ 140: private byte[] bytesCache; 141: 142: /** 143: * Locks the bytesCache above in read(byte[], int, int). 144: */ 145: private Object cacheLock = new Object(); 146: 147: /** 148: * This method initializes a new instance of <code>InputStreamReader</code> 149: * to read from the specified stream using the default encoding. 150: * 151: * @param in The <code>InputStream</code> to read from 152: */ 153: public InputStreamReader(InputStream in) 154: { 155: if (in == null) 156: throw new NullPointerException(); 157: this.in = in; 158: try 159: { 160: encoding = SystemProperties.getProperty("file.encoding"); 161: // Don't use NIO if avoidable 162: if(EncodingHelper.isISOLatin1(encoding)) 163: { 164: encoding = "ISO8859_1"; 165: maxBytesPerChar = 1f; 166: decoder = null; 167: return; 168: } 169: Charset cs = EncodingHelper.getCharset(encoding); 170: decoder = cs.newDecoder(); 171: encoding = EncodingHelper.getOldCanonical(cs.name()); 172: try { 173: maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); 174: } catch(UnsupportedOperationException _){ 175: maxBytesPerChar = 1f; 176: } 177: decoder.onMalformedInput(CodingErrorAction.REPLACE); 178: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 179: decoder.reset(); 180: } catch(RuntimeException e) { 181: encoding = "ISO8859_1"; 182: maxBytesPerChar = 1f; 183: decoder = null; 184: } catch(UnsupportedEncodingException e) { 185: encoding = "ISO8859_1"; 186: maxBytesPerChar = 1f; 187: decoder = null; 188: } 189: } 190: 191: /** 192: * This method initializes a new instance of <code>InputStreamReader</code> 193: * to read from the specified stream using a caller supplied character 194: * encoding scheme. Note that due to a deficiency in the Java language 195: * design, there is no way to determine which encodings are supported. 196: * 197: * @param in The <code>InputStream</code> to read from 198: * @param encoding_name The name of the encoding scheme to use 199: * 200: * @exception UnsupportedEncodingException If the encoding scheme 201: * requested is not available. 202: */ 203: public InputStreamReader(InputStream in, String encoding_name) 204: throws UnsupportedEncodingException 205: { 206: if (in == null 207: || encoding_name == null) 208: throw new NullPointerException(); 209: 210: this.in = in; 211: // Don't use NIO if avoidable 212: if(EncodingHelper.isISOLatin1(encoding_name)) 213: { 214: encoding = "ISO8859_1"; 215: maxBytesPerChar = 1f; 216: decoder = null; 217: return; 218: } 219: try { 220: Charset cs = EncodingHelper.getCharset(encoding_name); 221: try { 222: maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); 223: } catch(UnsupportedOperationException _){ 224: maxBytesPerChar = 1f; 225: } 226: 227: decoder = cs.newDecoder(); 228: decoder.onMalformedInput(CodingErrorAction.REPLACE); 229: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 230: decoder.reset(); 231: 232: // The encoding should be the old name, if such exists. 233: encoding = EncodingHelper.getOldCanonical(cs.name()); 234: } catch(RuntimeException e) { 235: encoding = "ISO8859_1"; 236: maxBytesPerChar = 1f; 237: decoder = null; 238: } 239: } 240: 241: /** 242: * Creates an InputStreamReader that uses a decoder of the given 243: * charset to decode the bytes in the InputStream into 244: * characters. 245: * 246: * @since 1.4 247: */ 248: public InputStreamReader(InputStream in, Charset charset) { 249: if (in == null) 250: throw new NullPointerException(); 251: this.in = in; 252: decoder = charset.newDecoder(); 253: 254: try { 255: maxBytesPerChar = charset.newEncoder().maxBytesPerChar(); 256: } catch(UnsupportedOperationException _){ 257: maxBytesPerChar = 1f; 258: } 259: 260: decoder.onMalformedInput(CodingErrorAction.REPLACE); 261: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 262: decoder.reset(); 263: encoding = EncodingHelper.getOldCanonical(charset.name()); 264: } 265: 266: /** 267: * Creates an InputStreamReader that uses the given charset decoder 268: * to decode the bytes in the InputStream into characters. 269: * 270: * @since 1.4 271: */ 272: public InputStreamReader(InputStream in, CharsetDecoder decoder) { 273: if (in == null) 274: throw new NullPointerException(); 275: this.in = in; 276: this.decoder = decoder; 277: 278: Charset charset = decoder.charset(); 279: try { 280: if (charset == null) 281: maxBytesPerChar = 1f; 282: else 283: maxBytesPerChar = charset.newEncoder().maxBytesPerChar(); 284: } catch(UnsupportedOperationException _){ 285: maxBytesPerChar = 1f; 286: } 287: 288: decoder.onMalformedInput(CodingErrorAction.REPLACE); 289: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 290: decoder.reset(); 291: if (charset == null) 292: encoding = "US-ASCII"; 293: else 294: encoding = EncodingHelper.getOldCanonical(decoder.charset().name()); 295: } 296: 297: /** 298: * This method closes this stream, as well as the underlying 299: * <code>InputStream</code>. 300: * 301: * @exception IOException If an error occurs 302: */ 303: public void close() throws IOException 304: { 305: synchronized (lock) 306: { 307: // Makes sure all intermediate data is released by the decoder. 308: if (decoder != null) 309: decoder.reset(); 310: if (in != null) 311: in.close(); 312: in = null; 313: isDone = true; 314: decoder = null; 315: } 316: } 317: 318: /** 319: * This method returns the name of the encoding that is currently in use 320: * by this object. If the stream has been closed, this method is allowed 321: * to return <code>null</code>. 322: * 323: * @return The current encoding name 324: */ 325: public String getEncoding() 326: { 327: return in != null ? encoding : null; 328: } 329: 330: /** 331: * This method checks to see if the stream is ready to be read. It 332: * will return <code>true</code> if is, or <code>false</code> if it is not. 333: * If the stream is not ready to be read, it could (although is not required 334: * to) block on the next read attempt. 335: * 336: * @return <code>true</code> if the stream is ready to be read, 337: * <code>false</code> otherwise 338: * 339: * @exception IOException If an error occurs 340: */ 341: public boolean ready() throws IOException 342: { 343: if (in == null) 344: throw new IOException("Reader has been closed"); 345: 346: return in.available() != 0; 347: } 348: 349: /** 350: * This method reads up to <code>length</code> characters from the stream into 351: * the specified array starting at index <code>offset</code> into the 352: * array. 353: * 354: * @param buf The character array to recieve the data read 355: * @param offset The offset into the array to start storing characters 356: * @param length The requested number of characters to read. 357: * 358: * @return The actual number of characters read, or -1 if end of stream. 359: * 360: * @exception IOException If an error occurs 361: */ 362: public int read(char[] buf, int offset, int length) throws IOException 363: { 364: if (in == null) 365: throw new IOException("Reader has been closed"); 366: if (isDone) 367: return -1; 368: if(decoder != null) 369: { 370: int totalBytes = (int)((double) length * maxBytesPerChar); 371: if (byteBuffer != null) 372: totalBytes = Math.max(totalBytes, byteBuffer.remaining()); 373: byte[] bytes; 374: // Fetch cached bytes array if available and big enough. 375: synchronized(cacheLock) 376: { 377: bytes = bytesCache; 378: if (bytes == null || bytes.length < totalBytes) 379: bytes = new byte[totalBytes]; 380: else 381: bytesCache = null; 382: } 383: 384: int remaining = 0; 385: if(byteBuffer != null) 386: { 387: remaining = byteBuffer.remaining(); 388: byteBuffer.get(bytes, 0, remaining); 389: } 390: int read; 391: if(totalBytes - remaining > 0) 392: { 393: read = in.read(bytes, remaining, totalBytes - remaining); 394: if(read == -1){ 395: read = remaining; 396: isDone = true; 397: } else 398: read += remaining; 399: } else 400: read = remaining; 401: byteBuffer = ByteBuffer.wrap(bytes, 0, read); 402: CharBuffer cb = CharBuffer.wrap(buf, offset, length); 403: int startPos = cb.position(); 404: 405: if(hasSavedSurrogate){ 406: hasSavedSurrogate = false; 407: cb.put(savedSurrogate); 408: read++; 409: } 410: 411: CoderResult cr = decoder.decode(byteBuffer, cb, isDone); 412: decoder.reset(); 413: // 1 char remains which is the first half of a surrogate pair. 414: if(cr.isOverflow() && cb.hasRemaining()){ 415: CharBuffer overflowbuf = CharBuffer.allocate(2); 416: cr = decoder.decode(byteBuffer, overflowbuf, isDone); 417: overflowbuf.flip(); 418: if(overflowbuf.hasRemaining()) 419: { 420: cb.put(overflowbuf.get()); 421: savedSurrogate = overflowbuf.get(); 422: hasSavedSurrogate = true; 423: isDone = false; 424: } 425: } 426: 427: if(byteBuffer.hasRemaining()) { 428: byteBuffer.compact(); 429: byteBuffer.flip(); 430: isDone = false; 431: } else 432: byteBuffer = null; 433: 434: read = cb.position() - startPos; 435: 436: // Put cached bytes array back if we are finished and the cache 437: // is null or smaller than the used bytes array. 438: synchronized (cacheLock) 439: { 440: if (byteBuffer == null 441: && (bytesCache == null || bytesCache.length < bytes.length)) 442: bytesCache = bytes; 443: } 444: return (read <= 0) ? -1 : read; 445: } 446: else 447: { 448: byte[] bytes; 449: // Fetch cached bytes array if available and big enough. 450: synchronized (cacheLock) 451: { 452: bytes = bytesCache; 453: if (bytes == null || length < bytes.length) 454: bytes = new byte[length]; 455: else 456: bytesCache = null; 457: } 458: 459: int read = in.read(bytes); 460: for(int i=0;i<read;i++) 461: buf[offset+i] = (char)(bytes[i]&0xFF); 462: 463: // Put back byte array into cache if appropriate. 464: synchronized (cacheLock) 465: { 466: if (bytesCache == null || bytesCache.length < bytes.length) 467: bytesCache = bytes; 468: } 469: return read; 470: } 471: } 472: 473: /** 474: * Reads an char from the input stream and returns it 475: * as an int in the range of 0-65535. This method also will return -1 if 476: * the end of the stream has been reached. 477: * <p> 478: * This method will block until the char can be read. 479: * 480: * @return The char read or -1 if end of stream 481: * 482: * @exception IOException If an error occurs 483: */ 484: public int read() throws IOException 485: { 486: char[] buf = new char[1]; 487: int count = read(buf, 0, 1); 488: return count > 0 ? buf[0] : -1; 489: } 490: 491: /** 492: * Skips the specified number of chars in the stream. It 493: * returns the actual number of chars skipped, which may be less than the 494: * requested amount. 495: * 496: * @param count The requested number of chars to skip 497: * 498: * @return The actual number of chars skipped. 499: * 500: * @exception IOException If an error occurs 501: */ 502: public long skip(long count) throws IOException 503: { 504: if (in == null) 505: throw new IOException("Reader has been closed"); 506: 507: return super.skip(count); 508: } 509: }
GNU Classpath (0.95) |