GNU Classpath (0.95) | |
Frames | No Frames |
1: /* DocumentParser.java -- A parser for HTML documents. 2: Copyright (C) 2005 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package javax.swing.text.html.parser; 40: 41: import javax.swing.text.html.parser.Parser; 42: 43: import java.io.IOException; 44: import java.io.Reader; 45: 46: import javax.swing.text.BadLocationException; 47: import javax.swing.text.SimpleAttributeSet; 48: import javax.swing.text.html.HTMLEditorKit; 49: 50: /** 51: * <p>A simple error-tolerant HTML parser that uses a DTD document 52: * to access data on the possible tokens, arguments and syntax.</p> 53: * <p> The parser reads an HTML content from a Reader and calls various 54: * notifying methods (which should be overridden in a subclass) 55: * when tags or data are encountered.</p> 56: * <p>Some HTML elements need no opening or closing tags. The 57: * task of this parser is to invoke the tag handling methods also when 58: * the tags are not explicitly specified and must be supposed using 59: * information, stored in the DTD. 60: * For example, parsing the document 61: * <p><table><tr><td>a<td>b<td>c</tr> <br> 62: * will invoke exactly the handling methods exactly in the same order 63: * (and with the same parameters) as if parsing the document: <br> 64: * <em><html><head></head><body><table>< 65: * tbody></em><tr><td>a<em></td></em><td>b<em> 66: * </td></em><td>c<em></td></tr></em>< 67: * <em>/tbody></table></body></html></em></p> 68: * (supposed tags are given in italics). The parser also supports 69: * obsolete elements of HTML syntax.<p> 70: * </p> 71: * In this implementation, DocumentParser is directly derived from its 72: * ancestor without changes of functionality. 73: * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) 74: */ 75: public class DocumentParser 76: extends Parser 77: implements DTDConstants 78: { 79: /** 80: * The enclosed working parser class. 81: */ 82: private class gnuParser 83: extends gnu.javax.swing.text.html.parser.support.Parser 84: { 85: private gnuParser(DTD d) 86: { 87: super(d); 88: } 89: 90: protected final void handleComment(char[] comment) 91: { 92: parser.handleComment(comment); 93: callBack.handleComment(comment, hTag.where.startPosition); 94: } 95: 96: protected final void handleEmptyTag(TagElement tag) 97: throws javax.swing.text.ChangedCharSetException 98: { 99: parser.handleEmptyTag(tag); 100: callBack.handleSimpleTag(tag.getHTMLTag(), getAttributes(), 101: hTag.where.startPosition 102: ); 103: } 104: 105: protected final void handleEndTag(TagElement tag) 106: { 107: parser.handleEndTag(tag); 108: callBack.handleEndTag(tag.getHTMLTag(), hTag.where.startPosition); 109: } 110: 111: protected final void handleError(int line, String message) 112: { 113: parser.handleError(line, message); 114: callBack.handleError(message, hTag.where.startPosition); 115: } 116: 117: protected final void handleStartTag(TagElement tag) 118: { 119: parser.handleStartTag(tag); 120: SimpleAttributeSet attributes = gnu.getAttributes(); 121: 122: if (tag.fictional()) 123: attributes.addAttribute(HTMLEditorKit.ParserCallback.IMPLIED, 124: Boolean.TRUE 125: ); 126: 127: callBack.handleStartTag(tag.getHTMLTag(), attributes, 128: hTag.where.startPosition 129: ); 130: } 131: 132: protected final void handleText(char[] text) 133: { 134: parser.handleText(text); 135: callBack.handleText(text, hTag.where.startPosition); 136: } 137: 138: DTD getDTD() 139: { 140: return dtd; 141: } 142: } 143: 144: /** 145: * This field is used to access the identically named 146: * methods of the outer class. 147: * This is package-private to avoid an accessor method. 148: */ 149: DocumentParser parser = this; 150: 151: /** 152: * The callback. 153: * This is package-private to avoid an accessor method. 154: */ 155: HTMLEditorKit.ParserCallback callBack; 156: 157: /** 158: * The reference to the working class of HTML parser that is 159: * actually used to parse the document. 160: * This is package-private to avoid an accessor method. 161: */ 162: gnuParser gnu; 163: 164: /** 165: * Creates a new parser that uses the given DTD to access data on the 166: * possible tokens, arguments and syntax. There is no single - step way 167: * to get a default DTD; you must either refer to the implementation - 168: * specific packages, write your own DTD or obtain the working instance 169: * of parser in other way, for example, by calling 170: * {@link javax.swing.text.html.HTMLEditorKit#getParser()}. 171: * 172: * @param a_dtd a DTD to use. 173: */ 174: public DocumentParser(DTD a_dtd) 175: { 176: super(a_dtd); 177: gnu = new gnuParser(a_dtd); 178: } 179: 180: /** 181: * Parses the HTML document, calling methods of the provided 182: * callback. This method must be multithread - safe. 183: * @param reader The reader to read the HTML document from 184: * @param aCallback The callback that is notifyed about the presence 185: * of HTML elements in the document. 186: * @param ignoreCharSet If thrue, any charset changes during parsing 187: * are ignored. 188: * @throws java.io.IOException 189: */ 190: public void parse(Reader reader, HTMLEditorKit.ParserCallback aCallback, 191: boolean ignoreCharSet 192: ) 193: throws IOException 194: { 195: callBack = aCallback; 196: gnu.parse(reader); 197: 198: callBack.handleEndOfLineString(gnu.getEndOfLineSequence()); 199: try 200: { 201: callBack.flush(); 202: } 203: catch (BadLocationException ex) 204: { 205: // Convert this into the supported type of exception. 206: throw new IOException(ex.getMessage()); 207: } 208: } 209: 210: /** 211: * Handle HTML comment. The default method returns without action. 212: * @param comment the comment being handled 213: */ 214: protected void handleComment(char[] comment) 215: { 216: // This default implementation does nothing. 217: } 218: 219: /** 220: * Handle the tag with no content, like <br>. The method is 221: * called for the elements that, in accordance with the current DTD, 222: * has an empty content. 223: * @param tag the tag being handled. 224: * @throws javax.swing.text.ChangedCharSetException 225: */ 226: protected void handleEmptyTag(TagElement tag) 227: throws javax.swing.text.ChangedCharSetException 228: { 229: // This default implementation does nothing. 230: } 231: 232: /** 233: * The method is called when the HTML closing tag ((like </table>) 234: * is found or if the parser concludes that the one should be present 235: * in the current position. 236: * @param tag The tag being handled 237: */ 238: protected void handleEndTag(TagElement tag) 239: { 240: // This default implementation does nothing. 241: } 242: 243: /* Handle error that has occured in the given line. */ 244: protected void handleError(int line, String message) 245: { 246: // This default implementation does nothing. 247: } 248: 249: /** 250: * The method is called when the HTML opening tag ((like <table>) 251: * is found or if the parser concludes that the one should be present 252: * in the current position. 253: * @param tag The tag being handled 254: */ 255: protected void handleStartTag(TagElement tag) 256: { 257: // This default implementation does nothing. 258: } 259: 260: /** 261: * Handle the text section. 262: * @param text a section text. 263: */ 264: protected void handleText(char[] text) 265: { 266: // This default implementation does nothing. 267: } 268: }
GNU Classpath (0.95) |