Source for java.net.URLDecoder

   1: /* URLDecoder.java -- Class to decode URL's from encoded form.
   2:    Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package java.net;
  39: 
  40: import java.io.UnsupportedEncodingException;
  41: 
  42: 
  43: /**
  44:  * This utility class contains static methods that converts a
  45:  * string encoded in the x-www-form-urlencoded format to the original
  46:  * text.  The x-www-form-urlencoded format replaces certain disallowed
  47:  * characters with encoded equivalents.  All upper case and lower case
  48:  * letters in the US alphabet remain as is, the space character (' ')
  49:  * is replaced with '+' sign, and all other characters are converted to a
  50:  * "%XX" format where XX is the hexadecimal representation of that character
  51:  * in a given character encoding (default is "UTF-8").
  52:  * <p>
  53:  * This method is very useful for decoding strings sent to CGI scripts
  54:  *
  55:  * Written using on-line Java Platform 1.2/1.4 API Specification.
  56:  * Status:  Believed complete and correct.
  57:  *
  58:  * @since 1.2
  59:  *
  60:  * @author Warren Levy (warrenl@cygnus.com)
  61:  * @author Aaron M. Renn (arenn@urbanophile.com) (documentation comments)
  62:  * @author Mark Wielaard (mark@klomp.org)
  63:  */
  64: public class URLDecoder
  65: {
  66:   /**
  67:    * Public contructor. Note that this class has only static methods.
  68:    */
  69:   public URLDecoder()
  70:   {
  71:   }
  72: 
  73:   /**
  74:    * This method translates the passed in string from x-www-form-urlencoded
  75:    * format using the default encoding "UTF-8" to decode the hex encoded
  76:    * unsafe characters.
  77:    *
  78:    * @param s the String to convert
  79:    *
  80:    * @return the converted String
  81:    *
  82:    * @deprecated
  83:    */
  84:   public static String decode(String s)
  85:   {
  86:     try
  87:       {
  88:     return decode(s, "UTF-8");
  89:       }
  90:     catch (UnsupportedEncodingException uee)
  91:       {
  92:     // Should never happen since UTF-8 encoding should always be supported
  93:     return s;
  94:       }
  95:   }
  96: 
  97:   /**
  98:    * This method translates the passed in string from x-www-form-urlencoded
  99:    * format using the given character encoding to decode the hex encoded
 100:    * unsafe characters.
 101:    *
 102:    * This implementation will decode the string even if it contains
 103:    * unsafe characters (characters that should have been encoded) or if the
 104:    * two characters following a % do not represent a hex encoded byte.
 105:    * In those cases the unsafe character or the % character will be added
 106:    * verbatim to the decoded result.
 107:    *
 108:    * @param s the String to convert
 109:    * @param encoding the character encoding to use the decode the hex encoded
 110:    *        unsafe characters
 111:    *
 112:    * @return the converted String
 113:    *
 114:    * @exception UnsupportedEncodingException If the named encoding is not
 115:    * supported
 116:    *
 117:    * @since 1.4
 118:    */
 119:   public static String decode(String s, String encoding)
 120:     throws UnsupportedEncodingException
 121:   {
 122:     // First convert all '+' characters to spaces.
 123:     String str = s.replace('+', ' ');
 124: 
 125:     // Then go through the whole string looking for byte encoded characters
 126:     int i;
 127:     int start = 0;
 128:     byte[] bytes = null;
 129:     int length = str.length();
 130:     StringBuffer result = new StringBuffer(length);
 131:     while ((i = str.indexOf('%', start)) >= 0)
 132:       {
 133:     // Add all non-encoded characters to the result buffer
 134:     result.append(str.substring(start, i));
 135:     start = i;
 136: 
 137:     // Get all consecutive encoded bytes
 138:     while ((i + 2 < length) && (str.charAt(i) == '%'))
 139:       i += 3;
 140: 
 141:     // Decode all these bytes
 142:     if ((bytes == null) || (bytes.length < ((i - start) / 3)))
 143:       bytes = new byte[((i - start) / 3)];
 144: 
 145:     int index = 0;
 146:     try
 147:       {
 148:         while (start < i)
 149:           {
 150:         String sub = str.substring(start + 1, start + 3);
 151:         bytes[index] = (byte) Integer.parseInt(sub, 16);
 152:         index++;
 153:         start += 3;
 154:           }
 155:       }
 156:     catch (NumberFormatException nfe)
 157:       {
 158:         // One of the hex encoded strings was bad
 159:       }
 160: 
 161:     // Add the bytes as characters according to the given encoding
 162:     result.append(new String(bytes, 0, index, encoding));
 163: 
 164:     // Make sure we skip to just after a % sign
 165:     // There might not have been enough encoded characters after the %
 166:     // or the hex chars were not actually hex chars (NumberFormatException)
 167:     if (start < length && s.charAt(start) == '%')
 168:       {
 169:         result.append('%');
 170:         start++;
 171:       }
 172:       }
 173: 
 174:     // Add any characters left
 175:     if (start < str.length())
 176:       result.append(str.substring(start));
 177: 
 178:     return result.toString();
 179:   }
 180: } // class URLDecoder