View Javadoc

1   package hotsax.html.sax;
2   
3   import org.xml.sax.*;
4   import org.xml.sax.helpers.*;
5   import org.xml.sax.ext.*;
6   
7   import java.io.*;
8   import java.util.*;
9   
10  
11  
12  /***
13   * SaxHandlerDelegate - provides a clean interface between the
14   *   Byacc/J generated HtmlParse and the SaxParser.
15   */
16  
17  public class SaxHandlerDelegate implements ParserDelegate {
18  
19  	private HtmlParser parser = null;
20  	private XMLReader reader = null;
21  	private ContentHandler contentHandler = null;
22  	private LexicalHandler lexicalHandler = null; 		// this one my not exist for Sax parser/Sax client combo
23  
24  	private org.xml.sax.helpers.AttributesImpl attrList;   // collect attributes in a list
25  
26  
27  	public SaxHandlerDelegate(HtmlParser HtmlParser) {
28  		this.parser = parser;
29  		attrList = new org.xml.sax.helpers.AttributesImpl();
30  	}
31  
32  
33  	// ContentHandler interface methods.
34  	// If any of these fire a SAXException, it is reported to parser.yyerror()
35  
36  	/***
37  	 * Parse a startDocument event and pass it to the resigtered content handler.
38  	 * This method fires in response to a HtmlParser.EOF lexer token beging recognised.
39  	 * SOF is a virtual token fired as the first event after the file is opened.
40  	 */
41  	public void startDocument() 
42          {
43  		try {
44  		    if (contentHandler != null)
45  			contentHandler.startDocument();	
46                          
47  		}
48  		catch (SAXException ex)
49  		{
50  		    parser.yyerror(ex.getMessage());
51  		}
52  	}
53  
54  
55        /*** 
56          * Parse a PI and pass it to the contentHandler event
57          *  (does not pass xml declaration:  <?xml version = 1>)
58  		* Separates the target from the data by using whitespace.
59          * 
60          */ 
61  	public void processingInstruction(HtmlParserVal target, HtmlParserVal lval)
62  	{
63  		try {
64              if (contentHandler != null) {
65                  StringTokenizer stok = new StringTokenizer(lval.sval);  // default delim = \sp
66                  
67                  if (stok.hasMoreElements())
68                  {
69                      String data;
70                      if (stok.hasMoreElements())
71                          data = stok.nextToken();
72                      else
73                          data = "";
74                      if (!target.equals("xml"))
75                          contentHandler.processingInstruction(target.toString(), data);
76                  }
77              }
78  		}
79  		catch (SAXException ex)
80  		{
81  			parser.yyerror(ex.getMessage());
82  		}
83  	}
84  
85  	/***
86    	 * Initialize the start of a start element. Prepares the attribute list
87  	 * to collect any attributes.
88  	 */
89  	public void startElement() 
90  	{
91  		attrList.clear();
92  	}
93  
94  	/***
95            * Adds an attribute to the list. The name of the attribute is normalized
96            * to lowercase 
97  	  */
98  	public void addAttribute(String name, String value) {
99  		attrList.addAttribute("", "", name, "NMTOKEN", value);
100 	}
101 
102 	public HtmlParserVal getAttributes() {
103 		HtmlParserVal aList = new HtmlParserVal(attrList);
104 		return aList;
105 	}
106 
107 	public void startElement(HtmlParserVal lval, HtmlParserVal attrList) {
108 		try {
109 			if (contentHandler != null)
110 			{
111 				 contentHandler.startElement("", lval.sval, "", (Attributes)attrList.obj);
112 			}
113 		}
114 		catch (SAXException ex)
115 		{
116 			parser.yyerror(ex.getMessage());
117 		}
118 	}
119 
120 	/***
121 	 * Fire startElement event. Note handled the actual beginning of the element by now
122 	 *  and have collected all attributes (if any)
123 	 */
124 	public void startElement(HtmlParserVal lval) {
125             try {
126                 if (contentHandler != null)
127                 {
128                     contentHandler.startElement("", lval.sval, "", attrList);
129                 }
130             }
131             catch (SAXException ex)
132             {
133                 parser.yyerror(ex.getMessage());
134             }
135 	}
136 
137 
138 	/***
139      * collect characters from parse stream. Unwrap the HtmlParserVal.sval 
140 	 * String to a character array. 
141      * TODO: After creating a LexicalHandler, make sure this gets called
142 	 *       in the comment state.
143      * TODO: This might be better done in the collection process
144      *   rather than always using a String. I.e. getting a bunch of chars instead of
145      *   incrementally appending one char at a time from yytext() 
146      */
147 	public void characters(HtmlParserVal lval) 
148 	{
149             try {
150                 if (contentHandler != null) // first unwrap to wrap later? for speed?
151                 {
152                     char ch[] = lval.sval.toCharArray();		
153                     contentHandler.characters(ch, 0, lval.sval.length());
154                 }
155             }
156             catch (SAXException ex)
157             {
158                     parser.yyerror(ex.getMessage());
159             }
160 	}
161 
162 
163 	/***
164      *  Fire endElement event. The name of the element is passed to the event handler.
165 	 *   Note these might be optionally missing in the HTML case.
166      */
167 	public void endElement(HtmlParserVal lval) 
168 	{
169             try {
170                 if (contentHandler != null)
171                     contentHandler.endElement("", lval.sval, "");
172             }
173             catch (SAXException ex)
174             {
175                 parser.yyerror(ex.getMessage());
176             }
177     }
178 
179 	/***
180 	 * Fire endDocument event.
181 	 */
182 	public void endDocument() 
183 	{
184             try {
185                 if (contentHandler != null)
186                     contentHandler.endDocument();
187             }
188             catch (SAXException ex)
189             {
190                 parser.yyerror(ex.getMessage());
191             }
192 	}
193 
194 	// LexicalHandler interface functions.
195 
196 	/***
197 	 * comment handler
198 	 * Note, these are delegate to the XMLReader's LexicalHandler if any
199 	 * TODO:  Check the property of the reader for its existance.
200 	 */
201 	public void comment(HtmlParserVal lval) {
202             try {
203                 if (lexicalHandler != null)
204                 {
205                     char ch[] = lval.sval.toCharArray();		
206                     lexicalHandler.comment(ch, 0, lval.sval.length());
207                 }
208             }
209             catch (SAXException ex)
210             {
211                 parser.yyerror(ex.getMessage());
212             }
213 	}
214 
215 
216 	/***
217 	 * CDATA handler
218 	 * Note, these are delegate to the XMLReader's LexicalHandler if any
219 	 * 	This only marks the start boundary condition. Text still goes through characters()
220 	 */
221 	public void startCDATA() {
222             try {
223                 if (lexicalHandler != null)
224                 {
225                     lexicalHandler.startCDATA();
226                 }
227             }
228             catch (SAXException ex)
229             {
230                     parser.yyerror(ex.getMessage());
231             }
232 	}
233 
234 	/***
235 	 * CDATA handler
236 	 * Note, these are delegate to the XMLReader's LexicalHandler if any
237 	 * 	This only marks the end boundary of the CDATA section. Text still goes through characters()
238 	 */
239 	public void endCDATA() {
240             try {
241                 if (lexicalHandler != null)
242                 {
243                     lexicalHandler.endCDATA();
244                 }
245             }
246             catch (SAXException ex)
247             {
248                     parser.yyerror(ex.getMessage());
249             }
250 	}
251 
252 	/***
253 	 * Start the beginning of the DOCTYPE (DTD) declaration
254 	 * Note, these are delegate to the XMLReader's LexicalHandler if any
255 	 */
256 	public void startDTD(HtmlParserVal lval) {
257             try {
258                if (lexicalHandler != null)
259                 {
260                     StringTokenizer stok = new StringTokenizer(lval.sval);  // default delim = \sp
261 
262                     if (stok.hasMoreElements())
263                     {
264                         String target = stok.nextToken();
265                         String data;
266                         if (stok.hasMoreElements())
267                             data = stok.nextToken();
268                         else
269                             data = "";
270 
271                         lexicalHandler.startDTD(target, data, null);
272                     }
273                 }
274             }
275             catch (SAXException ex)
276             {
277                     parser.yyerror(ex.getMessage());
278             }
279 	}
280 
281 	/***
282      *  End the DOCTYPE declaration
283      */
284 	public void endDTD()	{
285             try {
286                 if (lexicalHandler != null)
287                     lexicalHandler.endDTD();
288             }
289             catch (SAXException ex)
290             {
291                     parser.yyerror(ex.getMessage());
292             }
293 	}
294 
295 
296 
297 
298 
299     /***
300      * used by the SaxParser to set itself in ParserDelegate
301      */
302     public void setXMLReader(XMLReader reader) {
303             this.reader = reader;
304 
305         try {
306             if (reader != null)
307             {
308                 contentHandler = reader.getContentHandler(); // good idea to init first
309                 lexicalHandler = (LexicalHandler)reader.getProperty("http://xml.org/sax/properties/lexical-handler");
310             }
311         }
312         catch (SAXNotRecognizedException ex)
313         {
314                 System.err.println("No lexical handler set in property 'http://xml.org/sax/properties/lexical-handler'");
315         }
316         catch (SAXNotSupportedException ex)
317         {
318                 System.err.println("Lexical handler property not supported");
319         }
320 
321     }
322 
323 }