Source for java.util.zip.GZIPInputStream

   1: /* GZIPInputStream.java - Input filter for reading gzip file
   2:    Copyright (C) 1999, 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10:  
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.util.zip;
  40: 
  41: import java.io.EOFException;
  42: import java.io.IOException;
  43: import java.io.InputStream;
  44: 
  45: /**
  46:  * This filter stream is used to decompress a "GZIP" format stream. 
  47:  * The "GZIP" format is described in RFC 1952.
  48:  *
  49:  * @author John Leuner
  50:  * @author Tom Tromey
  51:  * @since JDK 1.1
  52:  */
  53: public class GZIPInputStream
  54:   extends InflaterInputStream
  55: {
  56:   /**
  57:    * The magic number found at the start of a GZIP stream.
  58:    */
  59:   public static final int GZIP_MAGIC = 0x8b1f;
  60: 
  61:   /**
  62:    * The mask for bit 0 of the flag byte.
  63:    */
  64:   static final int FTEXT = 0x1;
  65: 
  66:   /**
  67:    * The mask for bit 1 of the flag byte.
  68:    */
  69:   static final int FHCRC = 0x2;
  70: 
  71:   /**
  72:    * The mask for bit 2 of the flag byte.
  73:    */
  74:   static final int FEXTRA = 0x4;
  75: 
  76:   /**
  77:    * The mask for bit 3 of the flag byte.
  78:    */
  79:   static final int FNAME = 0x8;
  80: 
  81:   /**
  82:    * The mask for bit 4 of the flag byte.
  83:    */
  84:   static final int FCOMMENT = 0x10;
  85: 
  86:   /**
  87:    * The CRC-32 checksum value for uncompressed data.
  88:    */
  89:   protected CRC32 crc; 
  90: 
  91:   /**
  92:    * Indicates whether or not the end of the stream has been reached.
  93:    */  
  94:   protected boolean eos;
  95: 
  96:   /**
  97:    * Indicates whether or not the GZIP header has been read in.
  98:    */
  99:   private boolean readGZIPHeader;
 100: 
 101:   /**
 102:    * Creates a GZIPInputStream with the default buffer size.
 103:    *
 104:    * @param in The stream to read compressed data from 
 105:    *           (in GZIP format).
 106:    *
 107:    * @throws IOException if an error occurs during an I/O operation.
 108:    */
 109:   public GZIPInputStream(InputStream in)
 110:     throws IOException
 111:   {
 112:     this(in, 4096);
 113:   }
 114: 
 115:   /**
 116:    * Creates a GZIPInputStream with the specified buffer size.
 117:    *
 118:    * @param in The stream to read compressed data from 
 119:    *           (in GZIP format).
 120:    * @param size The size of the buffer to use.
 121:    *
 122:    * @throws IOException if an error occurs during an I/O operation.
 123:    * @throws IllegalArgumentException if <code>size</code>
 124:    * is less than or equal to 0.
 125:    */
 126:   public GZIPInputStream(InputStream in, int size)
 127:     throws IOException
 128:   {
 129:     super(in, new Inflater(true), size);
 130:     crc = new CRC32();
 131:     readHeader();
 132:   }
 133: 
 134:   /**
 135:    * Closes the input stream.
 136:    *
 137:    * @throws IOException if an error occurs during an I/O operation.
 138:    */
 139:   public void close()
 140:     throws IOException
 141:   {
 142:     // Nothing to do here.
 143:     super.close();
 144:   }
 145: 
 146:   /**
 147:    * Reads in GZIP-compressed data and stores it in uncompressed form
 148:    * into an array of bytes.  The method will block until either
 149:    * enough input data becomes available or the compressed stream
 150:    * reaches its end.
 151:    *
 152:    * @param buf the buffer into which the uncompressed data will
 153:    *            be stored.
 154:    * @param offset the offset indicating where in <code>buf</code>
 155:    *               the uncompressed data should be placed.
 156:    * @param len the number of uncompressed bytes to be read.
 157:    */
 158:   public int read(byte[] buf, int offset, int len) throws IOException
 159:   {
 160:     // We first have to slurp in the GZIP header, then we feed all the
 161:     // rest of the data to the superclass.
 162:     //
 163:     // As we do that we continually update the CRC32. Once the data is
 164:     // finished, we check the CRC32.
 165:     //
 166:     // This means we don't need our own buffer, as everything is done
 167:     // in the superclass.
 168:     if (!readGZIPHeader)
 169:       readHeader();
 170: 
 171:     if (eos)
 172:       return -1;
 173: 
 174:     //  System.err.println("GZIPIS.read(byte[], off, len ... " + offset + " and len " + len);
 175: 
 176:     /* We don't have to read the header,
 177:      * so we just grab data from the superclass.
 178:      */
 179:     int numRead = super.read(buf, offset, len);
 180:     if (numRead > 0)
 181:       crc.update(buf, offset, numRead);
 182: 
 183:     if (inf.finished())
 184:       readFooter();
 185:     return numRead;
 186:   }
 187: 
 188: 
 189:   /**
 190:    * Reads in the GZIP header.
 191:    */
 192:   private void readHeader() throws IOException
 193:   {
 194:     /* 1. Check the two magic bytes */
 195:     CRC32 headCRC = new CRC32();
 196:     int magic = in.read();
 197:     if (magic < 0)
 198:     {
 199:       eos = true;
 200:       return;
 201:     }
 202:     int magic2 = in.read();
 203:     if ((magic + (magic2 << 8)) != GZIP_MAGIC)
 204:       throw new IOException("Error in GZIP header, bad magic code");
 205:     headCRC.update(magic);
 206:     headCRC.update(magic2);
 207:     
 208:     /* 2. Check the compression type (must be 8) */
 209:     int CM = in.read();
 210:     if (CM != Deflater.DEFLATED)
 211:       throw new IOException("Error in GZIP header, data not in deflate format");
 212:     headCRC.update(CM);
 213: 
 214:     /* 3. Check the flags */
 215:     int flags = in.read();
 216:     if (flags < 0)
 217:       throw new EOFException("Early EOF in GZIP header");
 218:     headCRC.update(flags);
 219:     
 220:     /*    This flag byte is divided into individual bits as follows:
 221:       
 222:       bit 0   FTEXT
 223:       bit 1   FHCRC
 224:       bit 2   FEXTRA
 225:       bit 3   FNAME
 226:       bit 4   FCOMMENT
 227:       bit 5   reserved
 228:       bit 6   reserved
 229:       bit 7   reserved
 230:     */
 231:     
 232:     /* 3.1 Check the reserved bits are zero */    
 233:     if ((flags & 0xd0) != 0)
 234:       throw new IOException("Reserved flag bits in GZIP header != 0");
 235:     
 236:     /* 4.-6. Skip the modification time, extra flags, and OS type */
 237:     for (int i=0; i< 6; i++)
 238:     {
 239:       int readByte = in.read();
 240:       if (readByte < 0)
 241:     throw new EOFException("Early EOF in GZIP header");
 242:       headCRC.update(readByte);
 243:     }
 244:     
 245:     /* 7. Read extra field */
 246:     if ((flags & FEXTRA) != 0)
 247:     {
 248:       /* Skip subfield id */
 249:       for (int i=0; i< 2; i++)
 250:       {
 251:         int readByte = in.read();
 252:     if (readByte < 0)
 253:       throw new EOFException("Early EOF in GZIP header");
 254:     headCRC.update(readByte);
 255:       }
 256:       if (in.read() < 0 || in.read() < 0)
 257:     throw new EOFException("Early EOF in GZIP header");
 258:     
 259:       int len1, len2, extraLen;
 260:       len1 = in.read();
 261:       len2 = in.read();
 262:       if ((len1 < 0) || (len2 < 0))
 263:     throw new EOFException("Early EOF in GZIP header");
 264:       headCRC.update(len1);
 265:       headCRC.update(len2);
 266: 
 267:       extraLen = (len1 << 8) | len2;
 268:       for (int i = 0; i < extraLen;i++)
 269:       {
 270:     int readByte = in.read();
 271:     if (readByte < 0)
 272:       throw new EOFException("Early EOF in GZIP header");
 273:     headCRC.update(readByte);
 274:       }
 275:     }
 276:     
 277:     /* 8. Read file name */
 278:     if ((flags & FNAME) != 0)
 279:     {
 280:       int readByte;
 281:       while ( (readByte = in.read()) > 0)
 282:     headCRC.update(readByte);
 283:       if (readByte < 0)
 284:     throw new EOFException("Early EOF in GZIP file name");
 285:       headCRC.update(readByte);
 286:     }
 287: 
 288:     /* 9. Read comment */
 289:     if ((flags & FCOMMENT) != 0)
 290:     {
 291:       int readByte;
 292:       while ( (readByte = in.read()) > 0)
 293:         headCRC.update(readByte);
 294: 
 295:       if (readByte < 0)
 296:         throw new EOFException("Early EOF in GZIP comment");
 297:       headCRC.update(readByte);
 298:     }
 299:     
 300:     /* 10. Read header CRC */
 301:     if ((flags & FHCRC) != 0)
 302:     {
 303:       int tempByte;
 304:       int crcval = in.read();
 305:       if (crcval < 0)
 306:         throw new EOFException("Early EOF in GZIP header");
 307:     
 308:       tempByte = in.read();
 309:       if (tempByte < 0)
 310:         throw new EOFException("Early EOF in GZIP header");
 311:     
 312:       crcval = (crcval << 8) | tempByte;
 313:       if (crcval != ((int) headCRC.getValue() & 0xffff))
 314:         throw new IOException("Header CRC value mismatch");
 315:     }
 316:     
 317:     readGZIPHeader = true;
 318:     //System.err.println("Read GZIP header");
 319:   }
 320: 
 321:   private void readFooter() throws IOException
 322:   {
 323:     byte[] footer = new byte[8];
 324:     int avail = inf.getRemaining();
 325:     if (avail > 8)
 326:       avail = 8;
 327:     System.arraycopy(buf, len - inf.getRemaining(), footer, 0, avail);
 328:     int needed = 8 - avail;
 329:     while (needed > 0)
 330:     {
 331:       int count = in.read(footer, 8-needed, needed);
 332:       if (count <= 0)
 333:     throw new EOFException("Early EOF in GZIP footer");
 334:       needed -= count; //Jewel Jan 16
 335:     }
 336: 
 337:     int crcval = (footer[0] & 0xff) | ((footer[1] & 0xff) << 8)
 338:       | ((footer[2] & 0xff) << 16) | (footer[3] << 24);
 339:     if (crcval != (int) crc.getValue())
 340:       throw new IOException("GZIP crc sum mismatch, theirs \""
 341:                 + Integer.toHexString(crcval)
 342:                 + "\" and ours \""
 343:                 + Integer.toHexString( (int) crc.getValue()));
 344: 
 345:     int total = (footer[4] & 0xff) | ((footer[5] & 0xff) << 8)
 346:       | ((footer[6] & 0xff) << 16) | (footer[7] << 24);
 347:     if (total != inf.getTotalOut())
 348:       throw new IOException("Number of bytes mismatch");
 349: 
 350:     /* FIXME" XXX Should we support multiple members.
 351:      * Difficult, since there may be some bytes still in buf
 352:      */
 353:     eos = true;
 354:   }
 355: }