Source for java.net.URI

   1: /* URI.java -- An URI class
   2:    Copyright (C) 2002, 2004, 2005, 2006  Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.net;
  40: 
  41: import java.io.IOException;
  42: import java.io.ObjectInputStream;
  43: import java.io.ObjectOutputStream;
  44: import java.io.Serializable;
  45: import java.util.regex.Matcher;
  46: import java.util.regex.Pattern;
  47: 
  48: /**
  49:  * <p>
  50:  * A URI instance represents that defined by 
  51:  * <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC3986</a>,
  52:  * with some deviations.
  53:  * </p>
  54:  * <p>
  55:  * At its highest level, a URI consists of:
  56:  * </p>
  57:  * <code>[<em>scheme</em><strong>:</strong>]<em>scheme-specific-part</em>
  58:  * [<strong>#</strong><em>fragment</em>]</code>
  59:  * </p>
  60:  * <p>
  61:  * where <strong>#</strong> and <strong>:</strong> are literal characters,
  62:  * and those parts enclosed in square brackets are optional.
  63:  * </p>
  64:  * <p>
  65:  * There are two main types of URI.  An <em>opaque</em> URI is one
  66:  * which just consists of the above three parts, and is not further
  67:  * defined.  An example of such a URI would be <em>mailto:</em> URI.
  68:  * In contrast, <em>hierarchical</em> URIs give further definition
  69:  * to the scheme-specific part, so as represent some part of a hierarchical
  70:  * structure.
  71:  * </p>
  72:  * <p>
  73:  * <code>[<strong>//</strong><em>authority</em>][<em>path</em>]
  74:  * [<strong>?</strong><em>query</em>]</code>
  75:  * </p>
  76:  * <p>
  77:  * with <strong>/</strong> and <strong>?</strong> being literal characters.
  78:  * When server-based, the authority section is further subdivided into:
  79:  * </p>
  80:  * <p>
  81:  * <code>[<em>user-info</em><strong>@</strong>]<em>host</em>
  82:  * [<strong>:</strong><em>port</em>]</code>
  83:  * </p>
  84:  * <p>
  85:  * with <strong>@</strong> and <strong>:</strong> as literal characters.
  86:  * Authority sections that are not server-based are said to be registry-based.
  87:  * </p>
  88:  * <p>
  89:  * Hierarchical URIs can be either relative or absolute.  Absolute URIs
  90:  * always start with a `<strong>/</strong>', while relative URIs don't
  91:  * specify a scheme.  Opaque URIs are always absolute.
  92:  * </p>
  93:  * <p>
  94:  * Each part of the URI may have one of three states: undefined, empty
  95:  * or containing some content.  The former two of these are represented
  96:  * by <code>null</code> and the empty string in Java, respectively.
  97:  * The scheme-specific part may never be undefined.  It also follows from
  98:  * this that the path sub-part may also not be undefined, so as to ensure
  99:  * the former.
 100:  * </p>
 101:  * <h2>Character Escaping and Quoting</h2>
 102:  * <p>
 103:  * The characters that can be used within a valid URI are restricted.
 104:  * There are two main classes of characters which can't be used as is
 105:  * within the URI:
 106:  * </p>
 107:  * <ol>
 108:  * <li><strong>Characters outside the US-ASCII character set</strong>.
 109:  * These have to be <strong>escaped</strong> in order to create
 110:  * an RFC-compliant URI; this means replacing the character with the
 111:  * appropriate hexadecimal value, preceded by a `%'.</li>
 112:  * <li><strong>Illegal characters</strong> (e.g. space characters,
 113:  * control characters) are quoted, which results in them being encoded
 114:  * in the same way as non-US-ASCII characters.</li>
 115:  * </ol>
 116:  * <p>
 117:  * The set of valid characters differs depending on the section of the URI:
 118:  * </p>
 119:  * <ul>
 120:  * <li><strong>Scheme</strong>: Must be an alphanumeric, `-', `.' or '+'.</li>
 121:  * <li><strong>Authority</strong>:Composed of the username, host, port, `@'
 122:  * and `:'.</li>
 123:  * <li><strong>Username</strong>: Allows unreserved or percent-encoded
 124:  * characters, sub-delimiters and `:'.</li>
 125:  * <li><strong>Host</strong>: Allows unreserved or percent-encoded
 126:  * characters, sub-delimiters and square brackets (`[' and `]') for IPv6
 127:  * addresses.</li>
 128:  * <li><strong>Port</strong>: Digits only.</li>
 129:  * <li><strong>Path</strong>: Allows the path characters and `/'.
 130:  * <li><strong>Query</strong>: Allows the path characters, `?' and '/'.
 131:  * <li><strong>Fragment</strong>: Allows the path characters, `?' and '/'.
 132:  * </ul>
 133:  * <p>
 134:  * These definitions reference the following sets of characters:
 135:  * </p>
 136:  * <ul>
 137:  * <li><strong>Unreserved characters</strong>: The alphanumerics plus
 138:  * `-', `.', `_', and `~'.</li>
 139:  * <li><strong>Sub-delimiters</strong>: `!', `$', `&', `(', `)', `*',
 140:  * `+', `,', `;', `=' and the single-quote itself.</li>
 141:  * <li><strong>Path characters</strong>: Unreserved and percent-encoded
 142:  * characters and the sub-delimiters along with `@' and `:'.</li>
 143:  * </ul>
 144:  * <p>
 145:  * The constructors and accessor methods allow the use and retrieval of
 146:  * URI components which contain non-US-ASCII characters directly.
 147:  * They are only escaped when the <code>toASCIIString()</code> method
 148:  * is used.  In contrast, illegal characters are always quoted, with the
 149:  * exception of the return values of the non-raw accessors.
 150:  * </p>
 151:  *
 152:  * @author Ito Kazumitsu (ito.kazumitsu@hitachi-cable.co.jp)
 153:  * @author Dalibor Topic (robilad@kaffe.org)
 154:  * @author Michael Koch (konqueror@gmx.de)
 155:  * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
 156:  * @since 1.4
 157:  */
 158: public final class URI 
 159:   implements Comparable<URI>, Serializable
 160: {
 161:   /**
 162:    * For serialization compatability.
 163:    */
 164:   static final long serialVersionUID = -6052424284110960213L;
 165: 
 166:   /**
 167:    * Regular expression for parsing URIs.
 168:    *
 169:    * Taken from RFC 2396, Appendix B.
 170:    * This expression doesn't parse IPv6 addresses.
 171:    */
 172:   private static final String URI_REGEXP =
 173:     "^(([^:/?#]+):)?((//([^/?#]*))?([^?#]*)(\\?([^#]*))?)?(#(.*))?";
 174: 
 175:   /**
 176:    * Regular expression for parsing the authority segment.
 177:    */
 178:   private static final String AUTHORITY_REGEXP =
 179:     "(([^?#]*)@)?([^?#:]*)(:([0-9]*))?";
 180: 
 181:   /**
 182:    * Valid characters (taken from rfc2396/3986)
 183:    */
 184:   private static final String RFC2396_DIGIT = "0123456789";
 185:   private static final String RFC2396_LOWALPHA = "abcdefghijklmnopqrstuvwxyz";
 186:   private static final String RFC2396_UPALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
 187:   private static final String RFC2396_ALPHA =
 188:     RFC2396_LOWALPHA + RFC2396_UPALPHA;
 189:   private static final String RFC2396_ALPHANUM = RFC2396_DIGIT + RFC2396_ALPHA;
 190:   private static final String RFC3986_UNRESERVED = RFC2396_ALPHANUM + "-._~";
 191:   private static final String RFC3986_SUBDELIMS = "!$&'()*+,;=";
 192:   private static final String RFC3986_REG_NAME =
 193:     RFC3986_UNRESERVED + RFC3986_SUBDELIMS + "%";
 194:   private static final String RFC3986_PCHAR = RFC3986_UNRESERVED + 
 195:     RFC3986_SUBDELIMS + ":@%";
 196:   private static final String RFC3986_SEGMENT = RFC3986_PCHAR;
 197:   private static final String RFC3986_PATH_SEGMENTS = RFC3986_SEGMENT + "/";
 198:   private static final String RFC3986_SSP = RFC3986_PCHAR + "?/";
 199:   private static final String RFC3986_HOST = RFC3986_REG_NAME + "[]";
 200:   private static final String RFC3986_USERINFO = RFC3986_REG_NAME + ":";
 201: 
 202:   /**
 203:    * Index of scheme component in parsed URI.
 204:    */
 205:   private static final int SCHEME_GROUP = 2;
 206: 
 207:   /**
 208:    * Index of scheme-specific-part in parsed URI.
 209:    */
 210:   private static final int SCHEME_SPEC_PART_GROUP = 3;
 211: 
 212:   /**
 213:    * Index of authority component in parsed URI.
 214:    */
 215:   private static final int AUTHORITY_GROUP = 5;
 216: 
 217:   /**
 218:    * Index of path component in parsed URI.
 219:    */
 220:   private static final int PATH_GROUP = 6;
 221: 
 222:   /**
 223:    * Index of query component in parsed URI.
 224:    */
 225:   private static final int QUERY_GROUP = 8;
 226: 
 227:   /**
 228:    * Index of fragment component in parsed URI.
 229:    */
 230:   private static final int FRAGMENT_GROUP = 10;
 231:   
 232:   /**
 233:    * Index of userinfo component in parsed authority section.
 234:    */
 235:   private static final int AUTHORITY_USERINFO_GROUP = 2;
 236: 
 237:   /**
 238:    * Index of host component in parsed authority section.
 239:    */
 240:   private static final int AUTHORITY_HOST_GROUP = 3;
 241: 
 242:   /**
 243:    * Index of port component in parsed authority section.
 244:    */
 245:   private static final int AUTHORITY_PORT_GROUP = 5;
 246: 
 247:   /**
 248:    * The compiled version of the URI regular expression.
 249:    */
 250:   private static final Pattern URI_PATTERN;
 251: 
 252:   /**
 253:    * The compiled version of the authority regular expression.
 254:    */
 255:   private static final Pattern AUTHORITY_PATTERN;
 256: 
 257:   /**
 258:    * The set of valid hexadecimal characters.
 259:    */
 260:   private static final String HEX = "0123456789ABCDEF";
 261: 
 262:   private transient String scheme;
 263:   private transient String rawSchemeSpecificPart;
 264:   private transient String schemeSpecificPart;
 265:   private transient String rawAuthority;
 266:   private transient String authority;
 267:   private transient String rawUserInfo;
 268:   private transient String userInfo;
 269:   private transient String rawHost;
 270:   private transient String host;
 271:   private transient int port = -1;
 272:   private transient String rawPath;
 273:   private transient String path;
 274:   private transient String rawQuery;
 275:   private transient String query;
 276:   private transient String rawFragment;
 277:   private transient String fragment;
 278:   private String string;
 279: 
 280:   /**
 281:    * Static initializer to pre-compile the regular expressions.
 282:    */
 283:   static
 284:   {
 285:     URI_PATTERN = Pattern.compile(URI_REGEXP);
 286:     AUTHORITY_PATTERN = Pattern.compile(AUTHORITY_REGEXP);
 287:   }
 288: 
 289:   private void readObject(ObjectInputStream is)
 290:     throws ClassNotFoundException, IOException
 291:   {
 292:     this.string = (String) is.readObject();
 293:     try
 294:       {
 295:     parseURI(this.string);
 296:       }
 297:     catch (URISyntaxException x)
 298:       {
 299:     // Should not happen.
 300:     throw new RuntimeException(x);
 301:       }
 302:   }
 303: 
 304:   private void writeObject(ObjectOutputStream os) throws IOException
 305:   {
 306:     if (string == null)
 307:       string = toString(); 
 308:     os.writeObject(string);
 309:   }
 310: 
 311:   /**
 312:    * <p>
 313:    * Returns the string content of the specified group of the supplied
 314:    * matcher.  The returned value is modified according to the following:
 315:    * </p>
 316:    * <ul>
 317:    * <li>If the resulting string has a length greater than 0, then
 318:    * that string is returned.</li>
 319:    * <li>If a string of zero length, is matched, then the content
 320:    * of the preceding group is considered.  If this is also an empty
 321:    * string, then <code>null</code> is returned to indicate an undefined
 322:    * value.  Otherwise, the value is truly the empty string and this is
 323:    * the returned value.</li>
 324:    * </ul>
 325:    * <p>
 326:    * This method is used for matching against all parts of the URI
 327:    * that may be either undefined or empty (i.e. all those but the
 328:    * scheme-specific part and the path).  In each case, the preceding
 329:    * group is the content of the original group, along with some
 330:    * additional distinguishing feature.  For example, the preceding
 331:    * group for the query includes the preceding question mark,
 332:    * while that of the fragment includes the hash symbol.  The presence
 333:    * of these features enables disambiguation between the two cases
 334:    * of a completely unspecified value and a simple non-existant value.
 335:    * The scheme differs in that it will never return an empty string;
 336:    * the delimiter follows the scheme rather than preceding it, so
 337:    * it becomes part of the following section.  The same is true
 338:    * of the user information.
 339:    * </p>
 340:    *
 341:    * @param match the matcher, which contains the results of the URI
 342:    *              matched against the URI regular expression.
 343:    * @return either the matched content, <code>null</code> for undefined
 344:    *         values, or an empty string for a URI part with empty content.
 345:    */
 346:   private static String getURIGroup(Matcher match, int group)
 347:   {
 348:     String matched = match.group(group);
 349:     if (matched == null || matched.length() == 0)
 350:       {
 351:     String prevMatched = match.group(group -1);
 352:     if (prevMatched == null || prevMatched.length() == 0)
 353:       return null;
 354:     else
 355:       return "";
 356:       }
 357:     return matched;
 358:   }
 359: 
 360:   /**
 361:    * Sets fields of this URI by parsing the given string.
 362:    *
 363:    * @param str The string to parse
 364:    *
 365:    * @exception URISyntaxException If the given string violates RFC 2396
 366:    */
 367:   private void parseURI(String str) throws URISyntaxException
 368:   {
 369:     Matcher matcher = URI_PATTERN.matcher(str);
 370:     
 371:     if (matcher.matches())
 372:       {
 373:     scheme = getURIGroup(matcher, SCHEME_GROUP);
 374:     rawSchemeSpecificPart = matcher.group(SCHEME_SPEC_PART_GROUP);
 375:     schemeSpecificPart = unquote(rawSchemeSpecificPart);
 376:     if (!isOpaque())
 377:       {
 378:         rawAuthority = getURIGroup(matcher, AUTHORITY_GROUP);
 379:         rawPath = matcher.group(PATH_GROUP);
 380:         rawQuery = getURIGroup(matcher, QUERY_GROUP);
 381:       }
 382:     rawFragment = getURIGroup(matcher, FRAGMENT_GROUP);
 383:       }
 384:     else
 385:       throw new URISyntaxException(str,
 386:                    "doesn't match URI regular expression");
 387:     parseServerAuthority();
 388: 
 389:     // We must eagerly unquote the parts, because this is the only time
 390:     // we may throw an exception.
 391:     authority = unquote(rawAuthority);
 392:     userInfo = unquote(rawUserInfo);
 393:     host = unquote(rawHost);
 394:     path = unquote(rawPath);
 395:     query = unquote(rawQuery);
 396:     fragment = unquote(rawFragment);
 397:   }
 398: 
 399:   /**
 400:    * Unquote "%" + hex quotes characters
 401:    *
 402:    * @param str The string to unquote or null.
 403:    *
 404:    * @return The unquoted string or null if str was null.
 405:    *
 406:    * @exception URISyntaxException If the given string contains invalid
 407:    * escape sequences.
 408:    */
 409:   private static String unquote(String str) throws URISyntaxException
 410:   {
 411:     if (str == null)
 412:       return null;
 413:     byte[] buf = new byte[str.length()];
 414:     int pos = 0;
 415:     for (int i = 0; i < str.length(); i++)
 416:       {
 417:     char c = str.charAt(i);
 418:     if (c == '%')
 419:       {
 420:         if (i + 2 >= str.length())
 421:           throw new URISyntaxException(str, "Invalid quoted character");
 422:         int hi = Character.digit(str.charAt(++i), 16);
 423:         int lo = Character.digit(str.charAt(++i), 16);
 424:         if (lo < 0 || hi < 0)
 425:           throw new URISyntaxException(str, "Invalid quoted character");
 426:         buf[pos++] = (byte) (hi * 16 + lo);
 427:       }
 428:     else
 429:       buf[pos++] = (byte) c;
 430:       }
 431:     try
 432:       {
 433:     return new String(buf, 0, pos, "utf-8");
 434:       }
 435:     catch (java.io.UnsupportedEncodingException x2)
 436:       {
 437:     throw (Error) new InternalError().initCause(x2);
 438:       }
 439:   }
 440: 
 441:   /**
 442:    * Quote characters illegal in URIs in given string.
 443:    *
 444:    * Replace illegal characters by encoding their UTF-8
 445:    * representation as "%" + hex code for each resulting
 446:    * UTF-8 character.
 447:    *
 448:    * @param str The string to quote
 449:    *
 450:    * @return The quoted string.
 451:    */
 452:   private static String quote(String str)
 453:   {
 454:     return quote(str, RFC3986_SSP);
 455:   }
 456: 
 457:   /**
 458:    * Quote characters illegal in URI authorities in given string.
 459:    *
 460:    * Replace illegal characters by encoding their UTF-8
 461:    * representation as "%" + hex code for each resulting
 462:    * UTF-8 character.
 463:    *
 464:    * @param str The string to quote
 465:    *
 466:    * @return The quoted string.
 467:    */
 468:   private static String quoteAuthority(String str)
 469:   {
 470:     // Technically, we should be using RFC2396_AUTHORITY, but
 471:     // it contains no additional characters.
 472:     return quote(str, RFC3986_REG_NAME);
 473:   }
 474: 
 475:   /**
 476:    * Quotes the characters in the supplied string that are not part of
 477:    * the specified set of legal characters.
 478:    *
 479:    * @param str the string to quote
 480:    * @param legalCharacters the set of legal characters
 481:    *
 482:    * @return the quoted string.
 483:    */
 484:   private static String quote(String str, String legalCharacters)
 485:   {
 486:     StringBuffer sb = new StringBuffer(str.length());
 487:     for (int i = 0; i < str.length(); i++)
 488:       {
 489:     char c = str.charAt(i);
 490:     if ((legalCharacters.indexOf(c) == -1)
 491:         && (c <= 127))
 492:       {
 493:         sb.append('%');
 494:         sb.append(HEX.charAt(c / 16));
 495:         sb.append(HEX.charAt(c % 16));
 496:       }
 497:           else
 498:       sb.append(c);
 499:       }
 500:     return sb.toString();
 501:   }
 502: 
 503:   /**
 504:    * Quote characters illegal in URI hosts in given string.
 505:    *
 506:    * Replace illegal characters by encoding their UTF-8
 507:    * representation as "%" + hex code for each resulting
 508:    * UTF-8 character.
 509:    *
 510:    * @param str The string to quote
 511:    *
 512:    * @return The quoted string.
 513:    */
 514:   private static String quoteHost(String str)
 515:   {
 516:     return quote(str, RFC3986_HOST);
 517:   }
 518: 
 519:   /**
 520:    * Quote characters illegal in URI paths in given string.
 521:    *
 522:    * Replace illegal characters by encoding their UTF-8
 523:    * representation as "%" + hex code for each resulting
 524:    * UTF-8 character.
 525:    *
 526:    * @param str The string to quote
 527:    *
 528:    * @return The quoted string.
 529:    */
 530:   private static String quotePath(String str)
 531:   {
 532:     // Technically, we should be using RFC2396_PATH, but
 533:     // it contains no additional characters.
 534:     return quote(str, RFC3986_PATH_SEGMENTS);
 535:   }
 536: 
 537:   /**
 538:    * Quote characters illegal in URI user infos in given string.
 539:    *
 540:    * Replace illegal characters by encoding their UTF-8
 541:    * representation as "%" + hex code for each resulting
 542:    * UTF-8 character.
 543:    *
 544:    * @param str The string to quote
 545:    *
 546:    * @return The quoted string.
 547:    */
 548:   private static String quoteUserInfo(String str)
 549:   {
 550:     return quote(str, RFC3986_USERINFO);
 551:   }
 552: 
 553:   /**
 554:    * Creates an URI from the given string
 555:    *
 556:    * @param str The string to create the URI from
 557:    *
 558:    * @exception URISyntaxException If the given string violates RFC 2396
 559:    * @exception NullPointerException If str is null
 560:    */
 561:   public URI(String str) throws URISyntaxException
 562:   {
 563:     this.string = str;
 564:     parseURI(str);
 565:   }
 566: 
 567:   /**
 568:    * Create an URI from the given components
 569:    *
 570:    * @param scheme The scheme name
 571:    * @param userInfo The username and authorization info
 572:    * @param host The hostname
 573:    * @param port The port number
 574:    * @param path The path
 575:    * @param query The query
 576:    * @param fragment The fragment
 577:    *
 578:    * @exception URISyntaxException If the given string violates RFC 2396
 579:    */
 580:   public URI(String scheme, String userInfo, String host, int port,
 581:              String path, String query, String fragment)
 582:     throws URISyntaxException
 583:   {
 584:     this((scheme == null ? "" : scheme + ":")
 585:          + (userInfo == null && host == null && port == -1 ? "" : "//")
 586:          + (userInfo == null ? "" : quoteUserInfo(userInfo) + "@")
 587:          + (host == null ? "" : quoteHost(host))
 588:          + (port == -1 ? "" : ":" + String.valueOf(port))
 589:          + (path == null ? "" : quotePath(path))
 590:          + (query == null ? "" : "?" + quote(query))
 591:          + (fragment == null ? "" : "#" + quote(fragment)));
 592:   }
 593: 
 594:   /**
 595:    * Create an URI from the given components
 596:    *
 597:    * @param scheme The scheme name
 598:    * @param authority The authority
 599:    * @param path The apth
 600:    * @param query The query
 601:    * @param fragment The fragment
 602:    *
 603:    * @exception URISyntaxException If the given string violates RFC 2396
 604:    */
 605:   public URI(String scheme, String authority, String path, String query,
 606:              String fragment) throws URISyntaxException
 607:   {
 608:     this((scheme == null ? "" : scheme + ":")
 609:          + (authority == null ? "" : "//" + quoteAuthority(authority))
 610:          + (path == null ? "" : quotePath(path))
 611:          + (query == null ? "" : "?" + quote(query))
 612:          + (fragment == null ? "" : "#" + quote(fragment)));
 613:   }
 614: 
 615:   /**
 616:    * Create an URI from the given components
 617:    *
 618:    * @param scheme The scheme name
 619:    * @param host The hostname
 620:    * @param path The path
 621:    * @param fragment The fragment
 622:    *
 623:    * @exception URISyntaxException If the given string violates RFC 2396
 624:    */
 625:   public URI(String scheme, String host, String path, String fragment)
 626:     throws URISyntaxException
 627:   {
 628:     this(scheme, null, host, -1, path, null, fragment);
 629:   }
 630: 
 631:   /**
 632:    * Create an URI from the given components
 633:    *
 634:    * @param scheme The scheme name
 635:    * @param ssp The scheme specific part
 636:    * @param fragment The fragment
 637:    *
 638:    * @exception URISyntaxException If the given string violates RFC 2396
 639:    */
 640:   public URI(String scheme, String ssp, String fragment)
 641:     throws URISyntaxException
 642:   {
 643:     this((scheme == null ? "" : scheme + ":")
 644:          + (ssp == null ? "" : quote(ssp))
 645:          + (fragment == null ? "" : "#" + quote(fragment)));
 646:   }
 647: 
 648:   /**
 649:    * Create an URI from the given string
 650:    *
 651:    * @param str The string to create the URI from
 652:    *
 653:    * @exception IllegalArgumentException If the given string violates RFC 2396
 654:    * @exception NullPointerException If str is null
 655:    */
 656:   public static URI create(String str)
 657:   {
 658:     try
 659:       {
 660:     return new URI(str);
 661:       }
 662:     catch (URISyntaxException e)
 663:       {
 664:     throw (IllegalArgumentException) new IllegalArgumentException()
 665:           .initCause(e);
 666:       }
 667:   }
 668: 
 669:   /**
 670:    * Attempts to parse this URI's authority component, if defined,
 671:    * into user-information, host, and port components.  The purpose
 672:    * of this method was to disambiguate between some authority sections,
 673:    * which form invalid server-based authories, but valid registry
 674:    * based authorities.  In the updated RFC 3986, the authority section
 675:    * is defined differently, with registry-based authorities part of
 676:    * the host section.  Thus, this method is now simply an explicit
 677:    * way of parsing any authority section.
 678:    *
 679:    * @return the URI, with the authority section parsed into user
 680:    *         information, host and port components.
 681:    * @throws URISyntaxException if the given string violates RFC 2396
 682:    */
 683:   public URI parseServerAuthority() throws URISyntaxException
 684:   {
 685:     if (rawAuthority != null)
 686:       {
 687:     Matcher matcher = AUTHORITY_PATTERN.matcher(rawAuthority);
 688: 
 689:     if (matcher.matches())
 690:       {
 691:         rawUserInfo = getURIGroup(matcher, AUTHORITY_USERINFO_GROUP);
 692:         rawHost = getURIGroup(matcher, AUTHORITY_HOST_GROUP);
 693:         
 694:         String portStr = getURIGroup(matcher, AUTHORITY_PORT_GROUP);
 695:         
 696:         if (portStr != null && ! portStr.isEmpty())
 697:           try
 698:         {
 699:           port = Integer.parseInt(portStr);
 700:         }
 701:           catch (NumberFormatException e)
 702:         {
 703:           URISyntaxException use =
 704:             new URISyntaxException
 705:               (string, "doesn't match URI regular expression");
 706:           use.initCause(e);
 707:           throw use;
 708:         }
 709:       }
 710:     else
 711:       throw new URISyntaxException(string,
 712:                        "doesn't match URI regular expression");
 713:       }
 714:     return this;
 715:   }
 716: 
 717:   /**
 718:    * <p>
 719:    * Returns a normalized version of the URI.  If the URI is opaque,
 720:    * or its path is already in normal form, then this URI is simply
 721:    * returned.  Otherwise, the following transformation of the path
 722:    * element takes place:
 723:    * </p>
 724:    * <ol>
 725:    * <li>All `.' segments are removed.</li>
 726:    * <li>Each `..' segment which can be paired with a prior non-`..' segment
 727:    * is removed along with the preceding segment.</li>
 728:    * <li>A `.' segment is added to the front if the first segment contains
 729:    * a colon (`:').  This is a deviation from the RFC, which prevents
 730:    * confusion between the path and the scheme.</li>
 731:    * </ol>
 732:    * <p>
 733:    * The resulting URI will be free of `.' and `..' segments, barring those
 734:    * that were prepended or which couldn't be paired, respectively.
 735:    * </p>
 736:    *
 737:    * @return the normalized URI.
 738:    */
 739:   public URI normalize()
 740:   {
 741:     if (isOpaque() || path.indexOf("/./") == -1 && path.indexOf("/../") == -1)
 742:       return this;
 743:     try
 744:       {
 745:     return new URI(scheme, authority, normalizePath(path), query,
 746:                fragment);
 747:       }
 748:     catch (URISyntaxException e)
 749:       {
 750:     throw (Error) new InternalError("Normalized URI variant could not "+
 751:                     "be constructed").initCause(e);
 752:       }
 753:   }
 754: 
 755:   /**
 756:    * <p>
 757:    * Normalize the given path.  The following transformation takes place:
 758:    * </p>
 759:    * <ol>
 760:    * <li>All `.' segments are removed.</li>
 761:    * <li>Each `..' segment which can be paired with a prior non-`..' segment
 762:    * is removed along with the preceding segment.</li>
 763:    * <li>A `.' segment is added to the front if the first segment contains
 764:    * a colon (`:').  This is a deviation from the RFC, which prevents
 765:    * confusion between the path and the scheme.</li>
 766:    * </ol>
 767:    * <p>
 768:    * The resulting URI will be free of `.' and `..' segments, barring those
 769:    * that were prepended or which couldn't be paired, respectively.
 770:    * </p>
 771:    * 
 772:    * @param relativePath the relative path to be normalized.
 773:    * @return the normalized path.
 774:    */
 775:   private String normalizePath(String relativePath)
 776:   {
 777:     /* 
 778:        This follows the algorithm in section 5.2.4. of RFC3986,
 779:        but doesn't modify the input buffer.
 780:     */
 781:     StringBuffer input = new StringBuffer(relativePath);
 782:     StringBuffer output = new StringBuffer();
 783:     int start = 0;
 784:     while (start < input.length())
 785:       {
 786:     /* A */
 787:     if (input.indexOf("../",start) == start)
 788:       {
 789:         start += 3;
 790:         continue;
 791:       }
 792:     if (input.indexOf("./",start) == start)
 793:       {
 794:         start += 2;
 795:         continue;
 796:       }
 797:     /* B */
 798:     if (input.indexOf("/./",start) == start)
 799:       {
 800:         start += 2;
 801:         continue;
 802:       }
 803:     if (input.indexOf("/.",start) == start
 804:         && input.charAt(start + 2) != '.')
 805:       {
 806:         start += 1;
 807:         input.setCharAt(start,'/');
 808:         continue;
 809:       }
 810:     /* C */
 811:     if (input.indexOf("/../",start) == start)
 812:       {
 813:         start += 3;
 814:         removeLastSegment(output);
 815:         continue;
 816:       }
 817:     if (input.indexOf("/..",start) == start)
 818:       {
 819:         start += 2;
 820:         input.setCharAt(start,'/');
 821:         removeLastSegment(output);
 822:         continue;
 823:       }
 824:     /* D */
 825:     if (start == input.length() - 1 && input.indexOf(".",start) == start)
 826:       {
 827:         input.delete(0,1);
 828:         continue;
 829:       }
 830:     if (start == input.length() - 2 && input.indexOf("..",start) == start)
 831:       {
 832:         input.delete(0,2);
 833:         continue;
 834:       }
 835:     /* E */
 836:     int indexOfSlash = input.indexOf("/",start);
 837:     while (indexOfSlash == start)
 838:       {
 839:         output.append("/");
 840:         ++start;
 841:         indexOfSlash = input.indexOf("/",start);
 842:       }
 843:     if (indexOfSlash == -1)
 844:       indexOfSlash = input.length();
 845:     output.append(input.substring(start, indexOfSlash));
 846:         start = indexOfSlash;
 847:       }
 848:     return output.toString();
 849:   }
 850: 
 851:   /**
 852:    * Removes the last segment of the path from the specified buffer.
 853:    *
 854:    * @param buffer the buffer containing the path.
 855:    */
 856:   private void removeLastSegment(StringBuffer buffer)
 857:   {
 858:     int lastSlash = buffer.lastIndexOf("/");
 859:     if (lastSlash == -1)
 860:       buffer.setLength(0);
 861:<