Source for javax.swing.text.html.parser.DTD

   1: /* DTD.java --
   2:    Copyright (C) 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package javax.swing.text.html.parser;
  40: 
  41: import java.io.DataInputStream;
  42: import java.io.EOFException;
  43: import java.io.IOException;
  44: import java.io.ObjectInputStream;
  45: import java.lang.reflect.Field;
  46: import java.lang.reflect.Modifier;
  47: import java.util.BitSet;
  48: import java.util.Hashtable;
  49: import java.util.StringTokenizer;
  50: import java.util.Vector;
  51: 
  52: /**
  53:  * <p>Representation or the SGML DTD document.
  54:  * Provides basis for describing a syntax of the
  55:  * HTML documents. The fields of this class are NOT initialized in
  56:  * constructor. You need to do this separately before passing this data
  57:  * structure to the HTML parser. The subclasses with the fields, pre-
  58:  * initialized, for example, for HTML 4.01, can be available only between
  59:  * the implementation specific classes
  60:  * ( for example, {@link gnu.javax.swing.text.html.parser.HTML_401F }
  61:  * in this implementation).</p>
  62:  * <p>
  63:  * If you need more information about SGML DTD documents,
  64:  * the author suggests to read SGML tutorial on
  65:  * <a href="http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html"
  66:  * >http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html</a>.
  67:  * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>,
  68:  * Oxford University Press, 688 p, ISBN: 0198537379.
  69:  * </p>
  70:  * <p>
  71:  * Warning: the html, head and other tag fields will only be automatically
  72:  * assigned if the VM has the correctly implemented reflection mechanism.
  73:  * As these fields are not used anywhere in the implementation, not
  74:  * exception will be thrown in the opposite case.
  75:  * </p>
  76:  *
  77:  * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
  78:  */
  79: public class DTD
  80:   implements DTDConstants
  81: {
  82:   /**
  83:    * The version of the persistent data format.
  84:    * @specnote This was made <code>final</code> in 1.5.
  85:    */
  86:   public static final int FILE_VERSION = 1;
  87: 
  88:   /**
  89:    * The table of existing available DTDs.
  90:    */
  91:   static Hashtable<String,DTD> dtdHash = new Hashtable<String,DTD>();
  92: 
  93:   /**
  94:    * The applet element for this DTD.
  95:    */
  96:   public Element applet;
  97: 
  98:   /**
  99:    * The base element for this DTD.
 100:    */
 101:   public Element base;
 102: 
 103:   /**
 104:    * The body element for this DTD.
 105:    */
 106:   public Element body;
 107: 
 108:   /**
 109:    * The head element for this DTD.
 110:    */
 111:   public Element head;
 112: 
 113:   /**
 114:    * The html element for this DTD.
 115:    */
 116:   public Element html;
 117: 
 118:   /**
 119:    * The isindex element of for this DTD.
 120:    */
 121:   public Element isindex;
 122: 
 123:   /**
 124:    * The meta element for this DTD.
 125:    */
 126:   public Element meta;
 127: 
 128:   /**
 129:    * The p element for this DTD.
 130:    */
 131:   public Element p;
 132: 
 133:   /**
 134:    * The param element for this DTD.
 135:    */
 136:   public Element param;
 137: 
 138:   /**
 139:    * The pcdata for this DTD.
 140:    */
 141:   public Element pcdata;
 142: 
 143:   /**
 144:    * The title element for this DTD.
 145:    */
 146:   public Element title;
 147: 
 148:   /**
 149:    * The element for accessing all DTD elements by name.
 150:    */
 151:   public Hashtable<String,Element> elementHash =
 152:     new Hashtable<String,Element>();
 153: 
 154:   /**
 155:    * The entity table for accessing all DTD entities by name.
 156:    */
 157:   public Hashtable<Object, Entity> entityHash = new Hashtable<Object, Entity>();
 158: 
 159:   /**
 160:    *  The name of this DTD.
 161:    */
 162:   public String name;
 163: 
 164:   /**
 165:    * Contains all elements in this DTD. The
 166:    * javax.swing.text.html.parser.Element#index field of all elements
 167:    * in this vector is set to the element position in this vector.
 168:    */
 169:   public Vector<Element> elements = new Vector<Element>();
 170: 
 171:   /** Create a new DTD with the specified name. */
 172:   protected DTD(String a_name)
 173:   {
 174:     name = a_name;
 175:   }
 176: 
 177:   /** Get this DTD by name. The current implementation
 178:    * only looks in the internal table of DTD documents. If no corresponding
 179:    * entry is found, the new entry is created, placed into
 180:    * the table and returned. */
 181:   public static DTD getDTD(String name)
 182:                     throws IOException
 183:   {
 184:     DTD d = (DTD) dtdHash.get(name);
 185: 
 186:     if (d == null)
 187:       {
 188:         d = new DTD(name);
 189:         dtdHash.put(d.name, d);
 190:       }
 191: 
 192:     return d;
 193:   }
 194: 
 195:   /**
 196:    * Get the element by the element name. If the element is not yet
 197:    * defined, it is newly created and placed into the element table.
 198:    * If the element name matches (ingoring case) a public non static
 199:    * element field in this class, this field is assigned to the value
 200:    * of the newly created element.
 201:    */
 202:   public Element getElement(String element_name)
 203:   {
 204:     return newElement(element_name);
 205:   }
 206: 
 207:   /**
 208:    * Get the element by the value of its
 209:    * {@link javax.swing.text.html.parser.Element#index} field.
 210:    */
 211:   public Element getElement(int index)
 212:   {
 213:     return (Element) elements.get(index);
 214:   }
 215: 
 216:   /**
 217:    * Get the entity with the given identifier.
 218:    * @param id that can be returned by
 219:    * {@link javax.swing.text.html.parser.Entity#name2type(String an_entity)}
 220:    * @return The entity from this DTD or null if there is no entity with
 221:    * such id or such entity is not present in the table of this instance.
 222:    */
 223:   public Entity getEntity(int id)
 224:   {
 225:     String name = Entity.mapper.get(id);
 226: 
 227:     if (name != null)
 228:       return entityHash.get(name);
 229:     else
 230:       return null;
 231:   }
 232: 
 233:   /**
 234:    * Get the named entity by its name.
 235:    */
 236:   public Entity getEntity(String entity_name)
 237:   {
 238:     return (Entity) entityHash.get(entity_name);
 239:   }
 240: 
 241:   /**
 242:    * Get the name of this instance of DTD
 243:    */
 244:   public String getName()
 245:   {
 246:     return name;
 247:   }
 248: 
 249:   /**
 250:    * Creates, adds into the entity table and returns the
 251:    * character entity like <code>&amp;lt;</code>
 252:    *  (means '<code>&lt;</code>' );
 253:    * @param name The entity name (without heading &amp; and closing ;)
 254:    * @param type The entity type
 255:    * @param character The entity value (single character)
 256:    * @return The created entity
 257:    */
 258:   public Entity defEntity(String name, int type, int character)
 259:   {
 260:     Entity e = newEntity(name, type);
 261:     e.data = new char[] { (char) character };
 262:     return e;
 263:   }
 264: 
 265:   /**
 266:    * Define the attributes for the element with the given name.
 267:    * If the element is not exist, it is created.
 268:    * @param forElement
 269:    * @param attributes
 270:    */
 271:   public void defineAttributes(String forElement, AttributeList attributes)
 272:   {
 273:     Element e = elementHash.get(forElement.toLowerCase());
 274: 
 275:     if (e == null)
 276:       e = newElement(forElement);
 277: 
 278:     e.atts = attributes;
 279:   }
 280: 
 281:   /**
 282:    * Defines the element and adds it to the element table. Sets the
 283:    * <code>Element.index</code> field to the value, unique for this
 284:    * instance of DTD. If the element with the given name already exists,
 285:    * replaces all other its settings by the method argument values.
 286:    * @param name the name of the element
 287:    * @param type the type of the element
 288:    * @param headless true if the element needs no starting tag
 289:    * (should not occur in HTML).
 290:    * @param tailless true if the element needs no ending tag (like
 291:    * <code>&lt;hr&gt;</code>
 292:    * @param content the element content
 293:    * @param exclusions the set of elements that must not occur inside
 294:    * this element. The <code>Element.index</code> value defines which
 295:    * bit in this bitset corresponds to that element.
 296:    * @param inclusions the set of elements that can occur inside this
 297:    * element. the <code>Element.index</code> value defines which
 298:    * bit in this bitset corresponds to that element.
 299:    * @param attributes the element attributes.
 300:    * @return the newly defined element.
 301:    */
 302:   public Element defineElement(String name, int type, boolean headless,
 303:                                boolean tailless, ContentModel content,
 304:                                BitSet exclusions, BitSet inclusions,
 305:                                AttributeList attributes
 306:                               )
 307:   {
 308:     Element e = newElement(name);
 309:     e.type = type;
 310:     e.oStart = headless;
 311:     e.oEnd = tailless;
 312:     e.content = content;
 313:     e.exclusions = exclusions;
 314:     e.inclusions = inclusions;
 315:     e.atts = attributes;
 316: 
 317:     return e;
 318:   }
 319: 
 320:   /**
 321:    * Creates, intializes and adds to the entity table the new
 322:    * entity.
 323:    * @param name the name of the entity
 324:    * @param type the type of the entity
 325:    * @param data the data section of the entity
 326:    * @return the created entity
 327:    */
 328:   public Entity defineEntity(String name, int type, char[] data)
 329:   {
 330:     Entity e = newEntity(name, type);
 331:     e.data = data;
 332: 
 333:     return e;
 334:   }
 335: 
 336:   /** Place this DTD into the DTD table. */
 337:   public static void putDTDHash(String name, DTD dtd)
 338:   {
 339:     dtdHash.put(name, dtd);
 340:   }
 341: 
 342:   /**
 343:    * <p>Reads DTD from an archived format. This format is not standardized
 344:    * and differs between implementations.</p><p> This implementation
 345:    * reads and defines all entities and elements using
 346:    * ObjectInputStream. The elements and entities can be written into the
 347:    * stream in any order. The objects other than elements and entities
 348:    * are ignored.</p>
 349:    * @param stream A data stream to read from.
 350:    * @throws java.io.IOException If one is thrown by the input stream
 351:    */
 352:   public void read(DataInputStream stream)
 353:             throws java.io.IOException
 354:   {
 355:     ObjectInputStream oi = new ObjectInputStream(stream);
 356:     Object def;
 357:     try
 358:       {
 359:         while (true)
 360:           {
 361:             def = oi.readObject();
 362:             if (def instanceof Element)
 363:               {
 364:                 Element e = (Element) def;
 365:                 elementHash.put(e.name.toLowerCase(), e);
 366:                 assignField(e);
 367:               }
 368:             else if (def instanceof Entity)
 369:               {
 370:                 Entity e = (Entity) def;
 371:                 entityHash.put(e.name, e);
 372:               }
 373:           }
 374:       }
 375:     catch (ClassNotFoundException ex)
 376:       {
 377:         throw new IOException(ex.getMessage());
 378:       }
 379:     catch (EOFException ex)
 380:       {
 381:         // ok EOF
 382:       }
 383:   }
 384: 
 385:   /**
 386:    * Returns the name of this instance of DTD.
 387:    */
 388:   public String toString()
 389:   {
 390:     return name;
 391:   }
 392: 
 393:   /**
 394:    * Creates and returns new attribute (not an attribute list).
 395:    * @param name the name of this attribute
 396:    * @param type the type of this attribute (FIXED, IMPLIED or
 397:    * REQUIRED from <code>DTDConstants</code>).
 398:    * @param modifier the modifier of this attribute
 399:    * @param default_value the default value of this attribute
 400:    * @param allowed_values the allowed values of this attribute. The multiple
 401:    * possible values in this parameter are supposed to be separated by
 402:    * '|', same as in SGML DTD <code>&lt;!ATTLIST </code>tag. This parameter
 403:    * can be null if no list of allowed values is specified.
 404:    * @param atts the previous attribute of this element. This is
 405:    * placed to the field
 406:    * {@link javax.swing.text.html.parser.AttributeList#next },
 407:    * creating a linked list.
 408:    * @return The attributes.
 409:    */
 410:   protected AttributeList defAttributeList(String name, int type, int modifier,
 411:                                            String default_value,
 412:                                            String allowed_values,
 413:                                            AttributeList atts
 414:                                           )
 415:   {
 416:     AttributeList al = new AttributeList(name);
 417:     al.modifier = modifier;
 418:     al.value = default_value;
 419:     al.next = atts;
 420: 
 421:     if (allowed_values != null)
 422:       {
 423:         StringTokenizer st = new StringTokenizer(allowed_values, " \t|");
 424:         Vector<String> v = new Vector<String>(st.countTokens());
 425: 
 426:         while (st.hasMoreTokens())
 427:           v.add(st.nextToken());
 428: 
 429:         al.values = v;
 430:       }
 431: 
 432:     return al;
 433:   }
 434: 
 435:   /**
 436:    * Creates a new content model.
 437:    * @param type specifies the BNF operation for this content model.
 438:    * The valid operations are documented in the
 439:    * {@link javax.swing.text.html.parser.ContentModel#type }.
 440:    * @param content the content of this content model
 441:    * @param next if the content model is specified by BNF-like
 442:    * expression, contains the rest of this expression.
 443:    * @return The newly created content model.
 444:    */
 445:   protected ContentModel defContentModel(int type, Object content,
 446:                                          ContentModel next
 447:                                         )
 448:   {
 449:     ContentModel model = new ContentModel();
 450:     model.type = type;
 451:     model.next = next;
 452:     model.content = content;
 453: 
 454:     return model;
 455:   }
 456: 
 457:   /**
 458:    * Defines a new element and adds it to the element table.
 459:    * If the element alredy exists,
 460:    * overrides it settings with the specified values.
 461:    * @param name the name of the new element
 462:    * @param type the type of the element
 463:    * @param headless true if the element needs no starting tag
 464:    * @param tailless true if the element needs no closing tag
 465:    * @param content the element content.
 466:    * @param exclusions the elements that must be excluded from the
 467:    * content of this element, in all levels of the hierarchy.
 468:    * @param inclusions the elements that can be included as the
 469:    * content of this element.
 470:    * @param attributes the element attributes.
 471:    * @return the created or updated element.
 472:    */
 473:   protected Element defElement(String name, int type, boolean headless,
 474:                                boolean tailless, ContentModel content,
 475:                                String[] exclusions, String[] inclusions,
 476:                                AttributeList attributes
 477:                               )
 478:   {
 479:     // compute the bit sets
 480:     BitSet exclude = bitSet(exclusions);
 481:     BitSet include = bitSet(inclusions);
 482: 
 483:     Element e =
 484:       defineElement(name, type, headless, tailless, content, exclude, include,
 485:                     attributes
 486:                    );
 487: 
 488:     return e;
 489:   }
 490: 
 491:   /**
 492:    * Creates, intializes and adds to the entity table the new
 493:    * entity.
 494:    * @param name the name of the entity
 495:    * @param type the type of the entity
 496:    * @param data the data section of the entity
 497:    * @return the created entity
 498:    */
 499:   protected Entity defEntity(String name, int type, String data)
 500:   {
 501:     Entity e = newEntity(name, type);
 502:     e.data = data.toCharArray();
 503: 
 504:     return e;
 505:   }
 506: 
 507:   private void assignField(Element e)
 508:   {
 509:     String element_name = e.name;
 510:     try
 511:       {
 512:         // Assign the field via reflection.
 513:         Field f = getClass().getField(element_name.toLowerCase());
 514:         if ((f.getModifiers() & Modifier.PUBLIC) != 0)
 515:           if ((f.getModifiers() & Modifier.STATIC) == 0)
 516:             if (f.getType().isAssignableFrom(e.getClass()))
 517:               f.set(this, e);
 518:       }
 519:     catch (IllegalAccessException ex)
 520:       {
 521:         unexpected(ex);
 522:       }
 523:     catch (NoSuchFieldException ex)
 524:       {
 525:         // This is ok.
 526:       }
 527: 
 528:     // Some virtual machines may still lack the proper
 529:     // implementation of reflection. As the tag fields
 530:     // are not used anywhere in this implementation,
 531:     // (and this class is also rarely used by the end user),
 532:     // it may be better not to crash everything by throwing an error
 533:     // for each case when the HTML parsing is required.
 534:     catch (Throwable t)
 535:       {
 536:         // This VM has no reflection mechanism implemented!
 537:         if (t instanceof OutOfMemoryError)
 538:           throw (Error) t;
 539:       }
 540:   }
 541: 
 542:   /**
 543:    * Create the bit set for this array of elements.
 544:    * The unknown elements are automatically defined and added
 545:    * to the element table.
 546:    * @param elements
 547:    * @return The bit set.
 548:    */
 549:   private BitSet bitSet(String[] elements)
 550:   {
 551:     BitSet b = new BitSet();
 552: 
 553:     for (int i = 0; i < elements.length; i++)
 554:       {
 555:         Element e = getElement(elements [ i ]);
 556: 
 557:         if (e == null)
 558:           e = newElement(elements [ i ]);
 559: 
 560:         b.set(e.index);
 561:       }
 562: 
 563:     return b;
 564:   }
 565: 
 566:   /**
 567:    * Find the element with the given name in the element table.
 568:    * If not find, create a new element with this name and add to the
 569:    * table.
 570:    * @param name the name of the element
 571:    * @return the found or created element.
 572:    */
 573:   private Element newElement(String name)
 574:   {
 575:     Element e = elementHash.get(name.toLowerCase());
 576: 
 577:     if (e == null)
 578:       {
 579:         e = new Element();
 580:         e.name = name;
 581:         e.index = elements.size();
 582:         elements.add(e);
 583:         elementHash.put(e.name.toLowerCase(), e);
 584:         assignField(e);
 585:       }
 586:     return e;
 587:   }
 588: 
 589:   /**
 590:    * Creates and adds to the element table the entity with an
 591:    * unitialized data section. Used internally.
 592:    * @param name the name of the entity
 593:    * @param type the type of the entity, a bitwise combination
 594:    * of GENERAL, PARAMETER, SYSTEM and PUBLIC.
 595:    *
 596:    * @return the created entity
 597:    */
 598:   private Entity newEntity(String name, int type)
 599:   {
 600:     Entity e = new Entity(name, type, null);
 601:     entityHash.put(e.name, e);
 602:     return e;
 603:   }
 604: 
 605:   private void unexpected(Exception ex)
 606:   {
 607:     throw new Error("This should never happen, report a bug", ex);
 608:   }
 609: }