View Javadoc

1   /* The following code was generated by JFlex 1.3.2 on 8/10/01 4:37 PM */
2   
3   
4   package hotsax.html.sax;
5   
6   import java.io.*;
7   import java.util.*;
8   
9   
10  /***
11   * This class is a scanner generated by 
12   * <a href="http://www.jflex.de/">JFlex</a> 1.3.2
13   * on 8/10/01 4:37 PM from the specification file
14   * <tt>file:/home/edh/sourceforge/HotSAX/./build/src/hotsax/html/sax/ScriptLexer.flex</tt>
15   */
16  class ScriptLexer implements SemanticLexer {
17  
18    /*** This character denotes the end of file */
19    final public static int YYEOF = -1;
20  
21    /*** initial size of the lookahead buffer */
22    final private static int YY_BUFFERSIZE = 16384;
23  
24    /*** lexical states */
25    final public static int ERROR_RECOVER = 2;
26    final public static int IGNORE_CDATA = 1;
27    final public static int YYINITIAL = 0;
28  
29    /*** 
30     * Translates characters to character classes
31     */
32    final private static char [] yycmap = {
33       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
34       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
35       0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  3,  0,  5, 
36       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  4,  0, 
37       0, 14,  0,  7, 13,  0,  0,  0,  0,  9,  0,  0,  0,  0,  0,  0, 
38      10,  0,  8,  6, 11,  0,  0,  0,  0,  0,  0, 12,  0, 15,  0,  0, 
39       0, 14,  0,  7, 13,  0,  0,  0,  0,  9,  0,  0,  0,  0,  0,  0, 
40      10,  0,  8,  6, 11,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
41       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
42       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
43       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
44       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
45       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
46       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
47       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
48       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
49    };
50  
51    /*** 
52     * Translates a state to a row index in the transition table
53     */
54    final private static int yy_rowMap [] = { 
55          0,    16,    32,    48,    64,    80,    48,    96,   112,   128, 
56        144,   160,   176,   192,   208,   224,   240,   256,   144,   144, 
57        272,   288,   304,   320,   336,   352,   368,   384,   400,   416, 
58        144,   144
59    };
60  
61    /*** 
62     * The packed transition table of the DFA (part 0)
63     */
64    final private static String yy_packed0 = 
65      "\1\4\1\5\1\4\1\6\1\7\13\4\17\10\1\11"+
66      "\4\12\1\13\13\12\1\4\1\0\1\4\1\0\14\4"+
67      "\2\0\1\14\2\0\1\15\15\0\1\16\14\0\17\10"+
68      "\20\0\1\17\4\12\1\0\13\12\23\0\1\20\10\0"+
69      "\1\21\11\0\1\22\15\0\1\23\17\0\1\24\16\0"+
70      "\1\23\23\0\1\25\17\0\1\26\25\0\1\27\12\0"+
71      "\1\30\25\0\1\31\12\0\1\32\21\0\1\33\16\0"+
72      "\1\34\23\0\1\35\14\0\1\36\20\0\1\37\7\0"+
73      "\1\40\13\0";
74  
75    /*** 
76     * The transition table of the DFA
77     */
78    final private static int yytrans [] = yy_unpack();
79  
80  
81    /* error codes */
82    final private static int YY_UNKNOWN_ERROR = 0;
83    final private static int YY_ILLEGAL_STATE = 1;
84    final private static int YY_NO_MATCH = 2;
85    final private static int YY_PUSHBACK_2BIG = 3;
86  
87    /* error messages for the codes above */
88    final private static String YY_ERROR_MSG[] = {
89      "Unkown internal scanner error",
90      "Internal error: unknown state",
91      "Error: could not match input",
92      "Error: pushback value was too large"
93    };
94  
95    /***
96     * YY_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
97     */
98    private final static byte YY_ATTRIBUTE[] = {
99       1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  9,  0,  0,  0,  0,  0, 
100      0,  0,  9,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  9,  9
101   };
102 
103   /*** the input device */
104   private java.io.Reader yy_reader;
105 
106   /*** the current state of the DFA */
107   private int yy_state;
108 
109   /*** the current lexical state */
110   private int yy_lexical_state = YYINITIAL;
111 
112   /*** this buffer contains the current text to be matched and is
113       the source of the yytext() string */
114   private char yy_buffer[] = new char[YY_BUFFERSIZE];
115 
116   /*** the textposition at the last accepting state */
117   private int yy_markedPos;
118 
119   /*** the textposition at the last state to be included in yytext */
120   private int yy_pushbackPos;
121 
122   /*** the current text position in the buffer */
123   private int yy_currentPos;
124 
125   /*** startRead marks the beginning of the yytext() string in the buffer */
126   private int yy_startRead;
127 
128   /*** endRead marks the last character in the buffer, that has been read
129       from input */
130   private int yy_endRead;
131 
132   /*** number of newlines encountered up to the start of the matched text */
133   private int yyline;
134 
135   /*** the number of characters up to the start of the matched text */
136   private int yychar;
137 
138   /***
139    * the number of characters from the last newline up to the start of the 
140    * matched text
141    */
142   private int yycolumn; 
143 
144   /*** 
145    * yy_atBOL == true <=> the scanner is currently at the beginning of a line
146    */
147   private boolean yy_atBOL = true;
148 
149   /*** yy_atEOF == true <=> the scanner is at the EOF */
150   private boolean yy_atEOF;
151 
152   /*** denotes if the user-EOF-code has already been executed */
153   private boolean yy_eof_done;
154 
155   /* user code: */
156 	private HtmlParser yyparser;
157 	private StringBuffer text = new StringBuffer();
158 	private StringBuffer cdata = new StringBuffer();
159 
160 	private Vector tokenQueue;
161 
162 	protected boolean debug = false;
163 	public void setDebug(boolean debug) { this.debug = debug; }
164 	public boolean getDebug() { return debug; }
165 
166 	public void p(String s) { System.out.println(s); }
167 	
168 	public boolean getEOF() { return yy_atEOF; }
169 
170 	public ScriptLexer(Reader r, HtmlParser p)
171 	{
172 		this(r);
173 		yyparser = p;
174 		tokenQueue = new Vector();
175 	}
176 
177 	// expected constructor to get called as an alternate SemanticLexer
178 	public ScriptLexer(HtmlParser p) {
179 		this(System.in);
180 		yyparser = p;
181 		tokenQueue = new Vector();
182 	}
183 
184 	Lval lexer_yylval;
185 	Lval empty_yylval = new Lval("");
186 	private boolean first = false;
187 	private boolean last = false;
188 
189 	public void yylexerror_reset() {
190 		System.err.println("char at : " + yycharat(yy_markedPos));
191 		yypushback(yylength() - 1);  // attempt at resetting this
192 	}
193 
194 	/*** 
195 	 *   mask the actual implementation of yylex to return the first SOF
196 	 *   and the final EOF. Marking the startDocument, EndDocuemnt events
197 	 *   Also catches supposedly unrecoverable Error. Forces new ERROR_RECOVER state.
198      *   #return the token from yylex() - one of HtmlParser.XXXXX
199 	 */
200 	public int _yylex() 
201 		throws IOException
202 	{
203 		int token;
204 
205 		lexer_yylval = empty_yylval; 
206 		try {
207 	
208 			if (tokenQueue.size() > 0) {
209 				Object o = tokenQueue.remove(0);
210 				if (o instanceof Lval) {
211 					setLval((Lval)o);
212 					o = tokenQueue.remove(0);
213 				}
214 				token = ((Integer)o).intValue();
215 		    }
216 			else {
217 				token = yylex();
218 				if (token == -2) { // then a force of returning the next item in the tokenQueue
219 					Object o = tokenQueue.remove(0);
220 					if (o instanceof Lval) {
221 						setLval((Lval)o);
222 						o = tokenQueue.remove(0);
223 					}
224 					token = ((Integer)o).intValue();
225 				}
226 			}
227 		}
228 		catch (Error err) {
229 			if (getDebug()) System.err.println("Caught error " + err.getMessage());
230 			yybegin(ERROR_RECOVER);
231 			if (yyparser != null)
232 				yyparser.yylval = empty_yylval;
233 			
234 			token = yylex(); // read ahead in ERROR_REOVER
235 		}		
236 		finally {
237 			cdata.setLength(0);
238 			text.setLength(0);
239 		}
240 		return token;
241 	}
242 	
243 
244 	// set the LH side of the parser
245 	void setLval(String text)
246 	{
247 		lexer_yylval = new Lval(text);
248 			
249 		if (yyparser != null)
250 			yyparser.yylval = lexer_yylval;
251 	}
252 
253 	void setLval(Attribute a)
254 	{
255 		lexer_yylval = new Lval(a);
256 
257 		if (yyparser != null)
258 			yyparser.yylval = lexer_yylval;
259 	}
260 
261 	void setLval(Lval l) {
262 		lexer_yylval = l;
263 
264 		if (yyparser != null)
265 			yyparser.yylval = lexer_yylval;
266 	}
267 
268 
269 	/*** Return the yy_reader for this class. Can be used to provide alternate scanner 
270 	  @return the Reader for this class */
271 	public Reader getReader() { return yy_reader; }
272 	public void setReader(Reader r) { yy_reader = r; }
273 
274 
275 	/***
276   	 * Sets this lexer to the same yybuffer, and character positions as the other lexer
277      */
278 	public void setBuffer(SemanticLexer lexer) {
279 		if (this.yy_buffer.length != lexer.getyyBuffer().length) {
280       		char newBuffer[] = new char[lexer.getyyBuffer().length];
281 			this.yy_buffer = newBuffer;
282 		}
283       	System.arraycopy(lexer.getyyBuffer(), 0, yy_buffer, 0, yy_buffer.length);
284 		this.yy_currentPos = lexer.getyyCurrentPos();
285 		this.yy_markedPos = lexer.getyyMarkedPos();
286 		this.yy_pushbackPos = lexer.getyyPushbackPos();
287 		this.yy_endRead = lexer.getyyEndRead();
288 		this.yy_startRead = lexer.getyyStartRead();
289 	}
290 
291 	public char[] getyyBuffer() { return yy_buffer; }
292 	public int getyyCurrentPos() { return yy_currentPos; } 
293 	public int getyyMarkedPos() { return yy_markedPos; }
294 	public int getyyPushbackPos() { return yy_pushbackPos; }
295 	public int getyyEndRead() { return yy_endRead; }
296 	public int getyyStartRead() { return yy_startRead; }
297 	public void printBuffer() {
298 		for (int i = 0; i < yy_endRead; i++) {
299 			System.out.print(yy_buffer[i]);
300 		}
301 	}
302 
303   /***
304    * Runs the scanner on input files.
305    *
306    * This main method is the debugging routine for the scanner.
307    * It prints each returned token to System.out until the end of
308    * file is reached, or an error occured.
309    *
310    * @param argv   the command line, contains the filenames to run
311    *               the scanner on.
312    */
313   public static void main(String argv[]) {
314     for (int i = 0; i < argv.length; i++) {
315       ScriptLexer scanner = null;
316       try {
317         scanner = new ScriptLexer( new java.io.FileReader(argv[i]), (HtmlParser)null );
318       }
319       catch (java.io.FileNotFoundException e) {
320         System.out.println("File not found : \""+argv[i]+"\"");
321         System.exit(1);
322       }
323       catch (java.io.IOException e) {
324         System.out.println("Error opening file \""+argv[i]+"\"");
325         System.exit(1);
326       }
327       catch (ArrayIndexOutOfBoundsException e) {
328         System.out.println("Usage : java HtmlLexer <inputfile>");
329         System.exit(1);
330       }
331 
332 	  scanner.setDebug(true);
333       try {
334         do {
335           System.out.println(scanner._yylex() + " : " + scanner.yytext() +  " lval:" + scanner.lexer_yylval);
336         } while (!scanner.yy_atEOF);
337 
338       }
339       catch (java.io.IOException e) {
340         System.out.println("An I/O error occured while scanning :");
341         System.out.println(e);
342         System.exit(1);
343       }
344       catch (Exception e) {
345         e.printStackTrace();
346         System.exit(1);
347       }
348     }
349   }
350 
351 
352   /***
353    * Creates a new scanner
354    * There is also a java.io.InputStream version of this constructor.
355    *
356    * @param   in  the java.io.Reader to read input from.
357    */
358   ScriptLexer(java.io.Reader in) {
359     this.yy_reader = in;
360   }
361 
362   /***
363    * Creates a new scanner.
364    * There is also java.io.Reader version of this constructor.
365    *
366    * @param   in  the java.io.Inputstream to read input from.
367    */
368   ScriptLexer(java.io.InputStream in) {
369     this(new java.io.InputStreamReader(in));
370   }
371 
372   /*** 
373    * Unpacks the split, compressed DFA transition table.
374    *
375    * @return the unpacked transition table
376    */
377   private static int [] yy_unpack() {
378     int [] trans = new int[432];
379     int offset = 0;
380     offset = yy_unpack(yy_packed0, offset, trans);
381     return trans;
382   }
383 
384   /*** 
385    * Unpacks the compressed DFA transition table.
386    *
387    * @param packed   the packed transition table
388    * @return         the index of the last entry
389    */
390   private static int yy_unpack(String packed, int offset, int [] trans) {
391     int i = 0;       /* index in packed string  */
392     int j = offset;  /* index in unpacked array */
393     int l = packed.length();
394     while (i < l) {
395       int count = packed.charAt(i++);
396       int value = packed.charAt(i++);
397       value--;
398       do trans[j++] = value; while (--count > 0);
399     }
400     return j;
401   }
402 
403 
404   /***
405    * Gets the next input character.
406    *
407    * @return      the next character of the input stream, EOF if the
408    *              end of the stream is reached.
409    * @exception   IOException  if any I/O-Error occurs
410    */
411   private int yy_advance() throws java.io.IOException {
412 
413     /* standard case */
414     if (yy_currentPos < yy_endRead) return yy_buffer[yy_currentPos++];
415 
416     /* if the eof is reached, we don't need to work hard */ 
417     if (yy_atEOF) return YYEOF;
418 
419     /* otherwise: need to refill the buffer */
420 
421     /* first: make room (if you can) */
422     if (yy_startRead > 0) {
423       System.arraycopy(yy_buffer, yy_startRead, 
424                        yy_buffer, 0, 
425                        yy_endRead-yy_startRead);
426 
427       /* translate stored positions */
428       yy_endRead-= yy_startRead;
429       yy_currentPos-= yy_startRead;
430       yy_markedPos-= yy_startRead;
431       yy_pushbackPos-= yy_startRead;
432       yy_startRead = 0;
433     }
434 
435     /* is the buffer big enough? */
436     if (yy_currentPos >= yy_buffer.length) {
437       /* if not: blow it up */
438       char newBuffer[] = new char[yy_currentPos*2];
439       System.arraycopy(yy_buffer, 0, newBuffer, 0, yy_buffer.length);
440       yy_buffer = newBuffer;
441     }
442 
443     /* finally: fill the buffer with new input */
444     int numRead = yy_reader.read(yy_buffer, yy_endRead, 
445                                             yy_buffer.length-yy_endRead);
446 
447     if ( numRead == -1 ) return YYEOF;
448 
449     yy_endRead+= numRead;
450 
451     return yy_buffer[yy_currentPos++];
452   }
453 
454 
455   /***
456    * Closes the input stream.
457    */
458   final public void yyclose() throws java.io.IOException {
459     yy_atEOF = true;            /* indicate end of file */
460     yy_endRead = yy_startRead;  /* invalidate buffer    */
461 
462     if (yy_reader != null)
463       yy_reader.close();
464   }
465 
466 
467   /***
468    * Closes the current stream, and resets the
469    * scanner to read from a new input stream.
470    *
471    * All internal variables are reset, the old input stream 
472    * <b>cannot</b> be reused (internal buffer is discarded and lost).
473    * Lexical state is set to <tt>YY_INITIAL</tt>.
474    *
475    * @param reader   the new input stream 
476    */
477   final public void yyreset(java.io.Reader reader) throws java.io.IOException {
478     yyclose();
479     yy_reader = reader;
480     yy_atBOL  = true;
481     yy_atEOF  = false;
482     yy_endRead = yy_startRead = 0;
483     yy_currentPos = yy_markedPos = yy_pushbackPos = 0;
484     yyline = yychar = yycolumn = 0;
485     yy_lexical_state = YYINITIAL;
486   }
487 
488 
489   /***
490    * Returns the current lexical state.
491    */
492   final public int yystate() {
493     return yy_lexical_state;
494   }
495 
496 
497   /***
498    * Enters a new lexical state
499    *
500    * @param newState the new lexical state
501    */
502   final public void yybegin(int newState) {
503     yy_lexical_state = newState;
504   }
505 
506 
507   /***
508    * Returns the text matched by the current regular expression.
509    */
510   final public String yytext() {
511     return new String( yy_buffer, yy_startRead, yy_markedPos-yy_startRead );
512   }
513 
514 
515   /***
516    * Returns the character at position <tt>pos</tt> from the 
517    * matched text. 
518    * 
519    * It is equivalent to yytext().charAt(pos), but faster
520    *
521    * @param pos the position of the character to fetch. 
522    *            A value from 0 to yylength()-1.
523    *
524    * @return the character at position pos
525    */
526   final public char yycharat(int pos) {
527     return yy_buffer[yy_startRead+pos];
528   }
529 
530 
531   /***
532    * Returns the length of the matched text region.
533    */
534   final public int yylength() {
535     return yy_markedPos-yy_startRead;
536   }
537 
538 
539   /***
540    * Reports an error that occured while scanning.
541    *
542    * In a wellformed scanner (no or only correct usage of 
543    * yypushback(int) and a match-all fallback rule) this method 
544    * will only be called with things that "Can't Possibly Happen".
545    * If this method is called, something is seriously wrong
546    * (e.g. a JFlex bug producing a faulty scanner etc.).
547    *
548    * Usual syntax/scanner level error handling should be done
549    * in error fallback rules.
550    *
551    * @param   errorCode  the code of the errormessage to display
552    */
553   private void yy_ScanError(int errorCode) {
554     String message;
555     try {
556       message = YY_ERROR_MSG[errorCode];
557     }
558     catch (ArrayIndexOutOfBoundsException e) {
559       message = YY_ERROR_MSG[YY_UNKNOWN_ERROR];
560     }
561 
562     throw new Error(message);
563   } 
564 
565 
566   /***
567    * Pushes the specified amount of characters back into the input stream.
568    *
569    * They will be read again by then next call of the scanning method
570    *
571    * @param number  the number of characters to be read again.
572    *                This number must not be greater than yylength()!
573    */
574   private void yypushback(int number)  {
575     if ( number > yylength() )
576       yy_ScanError(YY_PUSHBACK_2BIG);
577 
578     yy_markedPos -= number;
579   }
580 
581 
582   /***
583    * Contains user EOF-code, which will be executed exactly once,
584    * when the end of file is reached
585    */
586   private void yy_do_eof() throws java.io.IOException {
587     if (!yy_eof_done) {
588       yy_eof_done = true;
589       yyclose();
590     }
591   }
592 
593 
594   /***
595    * Resumes scanning until the next regular expression is matched,
596    * the end of input is encountered or an I/O-Error occurs.
597    *
598    * @return      the next token
599    * @exception   IOException  if any I/O-Error occurs
600    */
601   public int yylex() throws java.io.IOException {
602     int yy_input;
603     int yy_action;
604 
605 
606     while (true) {
607 
608       yy_action = -1;
609 
610       yy_currentPos = yy_startRead = yy_markedPos;
611 
612       yy_state = yy_lexical_state;
613 
614 
615       yy_forAction: {
616         while (true) {
617 
618           yy_input = yy_advance();
619 
620           if ( yy_input == YYEOF ) break yy_forAction;
621 
622           int yy_next = yytrans[ yy_rowMap[yy_state] + yycmap[yy_input] ];
623           if (yy_next == -1) break yy_forAction;
624           yy_state = yy_next;
625 
626           int yy_attributes = YY_ATTRIBUTE[yy_state];
627           if ( (yy_attributes & 1) > 0 ) {
628             yy_action = yy_state; 
629             yy_markedPos = yy_currentPos; 
630             if ( (yy_attributes & 8) > 0 ) break yy_forAction;
631           }
632 
633         }
634       }
635 
636 
637       switch (yy_action) {
638 
639         case 19: 
640           { 
641         yybegin(YYINITIAL);
642 		tokenQueue.add(new Lval(cdata.toString()));  
643 		tokenQueue.add(new Integer(HtmlParser.TEXT));
644 		tokenQueue.add(new Integer(HtmlParser.CDATA_END));
645 		return -2;
646      }
647         case 33: break;
648         case 1: 
649         case 7: 
650         case 8: 
651           { 
652 		cdata.append(yytext());
653      }
654         case 34: break;
655         case 4: 
656         case 6: 
657           {  
658 		text.append(yytext());
659 	 }
660         case 35: break;
661         case 0: 
662         case 3: 
663           {  
664 		text.append(yytext());
665 		tokenQueue.add(new Lval(new String(text.toString())));
666 		tokenQueue.add(new Integer(HtmlParser.TEXT));
667 		text.setLength(0);
668 	 }
669         case 36: break;
670         case 31: 
671           { 
672 		tokenQueue.add(new Integer(HtmlParser.ANGLE_END_OPEN)); // </
673 		String name = yytext();
674 		name = name.substring(2, 8);  						    // original case of script, Script, SCRIPT
675 		tokenQueue.add(new Lval(name));
676 		tokenQueue.add(new Integer(HtmlParser.NAME));			// script
677 		tokenQueue.add(new Integer(HtmlParser.ANGLE_CLOSE));   // >
678 		return -2;
679 	 }
680         case 37: break;
681         case 30: 
682           { 
683 		yybegin(IGNORE_CDATA);
684 		cdata.setLength(0);
685 		return HtmlParser.CDATA_START;
686 	 }
687         case 38: break;
688         case 10: 
689           { 
690 		yybegin(YYINITIAL);
691 		if (getDebug()) System.err.println("ERROR_RECOVER: recovered");
692 		return HtmlParser.ANGLE_CLOSE;
693 	 }
694         case 39: break;
695         case 5: 
696           { 
697 		text.append(yytext());
698 	 }
699         case 40: break;
700         case 2: 
701         case 9: 
702           { 
703 		/* ignore all this till recover */
704 	 }
705         case 41: break;
706         case 18: 
707           {  }
708         case 42: break;
709         default: 
710           if (yy_input == YYEOF && yy_startRead == yy_currentPos) {
711             yy_atEOF = true;
712             yy_do_eof();
713               { return 0; }
714           } 
715           else {
716             yy_ScanError(YY_NO_MATCH);
717           }
718       }
719     }
720   }
721 
722 
723 }