Source for java.util.regex.Pattern

   1: /* Pattern.java -- Compiled regular expression ready to be applied.
   2:    Copyright (C) 2002, 2004, 2005, 2007 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package java.util.regex;
  39: 
  40: import gnu.java.util.regex.RE;
  41: import gnu.java.util.regex.REException;
  42: import gnu.java.util.regex.RESyntax;
  43: 
  44: import java.io.Serializable;
  45: import java.util.ArrayList;
  46: 
  47: 
  48: /**
  49:  * Compiled regular expression ready to be applied. 
  50:  *
  51:  * @since 1.4
  52:  */
  53: public final class Pattern implements Serializable
  54: {
  55:   private static final long serialVersionUID = 5073258162644648461L;
  56:   
  57:   public static final int CANON_EQ = 128;
  58:   public static final int CASE_INSENSITIVE = 2;
  59:   public static final int COMMENTS = 4;
  60:   public static final int DOTALL = 32;
  61:   public static final int MULTILINE = 8;
  62:   public static final int UNICODE_CASE = 64;
  63:   public static final int UNIX_LINES = 1;
  64:   
  65:   private final String regex;
  66:   private final int flags;
  67: 
  68:   private final RE re;
  69: 
  70:   private Pattern (String regex, int flags)
  71:     throws PatternSyntaxException
  72:   {
  73:     this.regex = regex;
  74:     this.flags = flags;
  75: 
  76:     RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4;
  77:     int gnuFlags = 0;
  78:     gnuFlags |= RE.REG_ICASE_USASCII;
  79:     if ((flags & CASE_INSENSITIVE) != 0)
  80:       gnuFlags |= RE.REG_ICASE;
  81:     if ((flags & MULTILINE) != 0)
  82:       {
  83:         gnuFlags |= RE.REG_MULTILINE;
  84:         syntax = new RESyntax(syntax);
  85:         syntax.setLineSeparator(null);
  86:       }
  87:     if ((flags & DOTALL) != 0)
  88:       gnuFlags |= RE.REG_DOT_NEWLINE;
  89:     if ((flags & UNICODE_CASE) != 0)
  90:       gnuFlags &= ~RE.REG_ICASE_USASCII;
  91:     // not yet supported:
  92:     // if ((flags & CANON_EQ) != 0) gnuFlags =
  93: 
  94:     if ((flags & UNIX_LINES) != 0)
  95:       {
  96:     // Use a syntax set with \n for linefeeds?
  97:     syntax = new RESyntax(syntax);
  98:     syntax.setLineSeparator("\n");
  99:       }
 100: 
 101:     if ((flags & COMMENTS) != 0)
 102:       {
 103:     gnuFlags |= RE.REG_X_COMMENTS;
 104:       }
 105: 
 106:     try
 107:       {
 108:     this.re = new RE(regex, gnuFlags, syntax);
 109:       }
 110:     catch (REException e)
 111:       {
 112:     PatternSyntaxException pse;
 113:     pse = new PatternSyntaxException(e.getMessage(),
 114:                      regex, e.getPosition());
 115:     pse.initCause(e);
 116:     throw pse;
 117:       }
 118:   }
 119:  
 120:   // package private accessor method
 121:   RE getRE()
 122:   {
 123:     return re;
 124:   }
 125: 
 126:   /**
 127:    * @param regex The regular expression
 128:    *
 129:    * @exception PatternSyntaxException If the expression's syntax is invalid
 130:    */
 131:   public static Pattern compile (String regex)
 132:     throws PatternSyntaxException
 133:   {
 134:     return compile(regex, 0);
 135:   }
 136:   
 137:   /**
 138:    * @param regex The regular expression
 139:    * @param flags The match flags, a bit mask
 140:    *
 141:    * @exception PatternSyntaxException If the expression's syntax is invalid
 142:    * @exception IllegalArgumentException If bit values other than those
 143:    * corresponding to the defined match flags are set in flags
 144:    */
 145:   public static Pattern compile (String regex, int flags)
 146:     throws PatternSyntaxException
 147:   {
 148:     // FIXME: check which flags are really accepted
 149:     if ((flags & ~0xEF) != 0)
 150:       throw new IllegalArgumentException ();
 151:     
 152:     return new Pattern (regex, flags); 
 153:   }
 154:   
 155:   public int flags ()
 156:   {
 157:     return this.flags;
 158:   }
 159:   
 160:   /**
 161:    * @param regex The regular expression
 162:    * @param input The character sequence to be matched
 163:    *
 164:    * @exception PatternSyntaxException If the expression's syntax is invalid
 165:    */
 166:   public static boolean matches (String regex, CharSequence input) 
 167:   {
 168:     return compile(regex).matcher(input).matches();
 169:   }
 170:   
 171:   /**
 172:    * @param input The character sequence to be matched
 173:    */
 174:   public Matcher matcher (CharSequence input)
 175:   {
 176:     return new Matcher(this, input);
 177:   }
 178:   
 179:   /**
 180:    * @param input The character sequence to be matched
 181:    */
 182:   public String[] split (CharSequence input)
 183:   {
 184:     return split(input, 0);
 185:   }
 186:   
 187:   /**
 188:    * @param input The character sequence to be matched
 189:    * @param limit The result threshold
 190:    */
 191:   public String[] split (CharSequence input, int limit)
 192:   {
 193:     Matcher matcher = new Matcher(this, input);
 194:     ArrayList list = new ArrayList();
 195:     int empties = 0;
 196:     int count = 0;
 197:     int start = 0;
 198:     int end;
 199:     boolean matched = matcher.find();
 200: 
 201:     while (matched && (limit <= 0 || count < limit - 1))
 202:       {
 203:     ++count;
 204:     end = matcher.start();
 205:     if (start == end)
 206:       empties++;
 207:     else
 208:       {
 209:         while (empties > 0)
 210:           {
 211:         list.add("");
 212:         empties--;
 213:           }
 214: 
 215:         String text = input.subSequence(start, end).toString();
 216:         list.add(text);
 217:       }
 218:     start = matcher.end();
 219:     matched = matcher.find();
 220:       }
 221: 
 222:     // We matched nothing.
 223:     if (!matched && count == 0)
 224:       return new String[] { input.toString() };
 225:     
 226:     // Is the last token empty?
 227:     boolean emptyLast = (start == input.length());
 228: 
 229:     // Can/Must we add empties or an extra last token at the end?
 230:     if (list.size() < limit || limit < 0 || (limit == 0 && !emptyLast))
 231:       {
 232:     if (limit > list.size())
 233:       {
 234:         int max = limit - list.size();
 235:         empties = (empties > max) ? max : empties;
 236:       }
 237:     while (empties > 0)
 238:       {
 239:         list.add("");
 240:         empties--;
 241:       }
 242:       }
 243: 
 244:     // last token at end
 245:     if (limit != 0 || (limit == 0 && !emptyLast))
 246:       {
 247:     String t = input.subSequence(start, input.length()).toString();
 248:     if ("".equals(t) && limit == 0)
 249:       { /* Don't add. */ }
 250:     else
 251:       list.add(t);
 252:       }
 253: 
 254:     String[] output = new String [list.size()];
 255:     list.toArray(output);
 256:     return output;
 257:   }
 258:   
 259:   public String pattern ()
 260:   {
 261:     return regex;
 262:   }
 263: 
 264:   /**
 265:    * Return the regular expression used to construct this object.
 266:    * @specnote Prior to JDK 1.5 this method had a different behavior
 267:    * @since 1.5
 268:    */
 269:   public String toString()
 270:   {
 271:     return regex;
 272:   }
 273: }