View Javadoc

1   /* The following code was generated by JFlex 1.3.2 on 8/10/01 4:37 PM */
2   
3   
4   package hotsax.html.sax;
5   
6   import java.io.*;
7   import java.util.*;
8   
9   
10  /***
11   * This class is a scanner generated by 
12   * <a href="http://www.jflex.de/">JFlex</a> 1.3.2
13   * on 8/10/01 4:37 PM from the specification file
14   * <tt>file:/home/edh/sourceforge/HotSAX/./build/src/hotsax/html/sax/StyleLexer.flex</tt>
15   */
16  class StyleLexer implements SemanticLexer {
17  
18    /*** This character denotes the end of file */
19    final public static int YYEOF = -1;
20  
21    /*** initial size of the lookahead buffer */
22    final private static int YY_BUFFERSIZE = 16384;
23  
24    /*** lexical states */
25    final public static int ERROR_RECOVER = 2;
26    final public static int IGNORE_CDATA = 1;
27    final public static int YYINITIAL = 0;
28  
29    /*** 
30     * Translates characters to character classes
31     */
32    final private static char [] yycmap = {
33       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
34       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
35       0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  3,  0,  5, 
36       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  4,  0, 
37       0, 14,  0, 12, 13, 10,  0,  0,  0,  0,  0,  0,  9,  0,  0,  0, 
38       0,  0,  0,  6,  7,  0,  0,  0,  0,  8,  0, 11,  0, 15,  0,  0, 
39       0, 14,  0, 12, 13, 10,  0,  0,  0,  0,  0,  0,  9,  0,  0,  0, 
40       0,  0,  0,  6,  7,  0,  0,  0,  0,  8,  0,  0,  0,  0,  0,  0, 
41       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
42       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
43       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
44       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
45       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
46       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
47       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
48       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
49    };
50  
51    /*** 
52     * Translates a state to a row index in the transition table
53     */
54    final private static int yy_rowMap [] = { 
55          0,    16,    32,    48,    64,    80,    48,    96,   112,   128, 
56        144,   160,   176,   192,   208,   224,   240,   256,   144,   144, 
57        272,   288,   304,   320,   336,   352,   368,   384,   400,   144, 
58        144
59    };
60  
61    /*** 
62     * The packed transition table of the DFA (part 0)
63     */
64    final private static String yy_packed0 = 
65      "\1\4\1\5\1\4\1\6\1\7\13\4\17\10\1\11"+
66      "\4\12\1\13\13\12\1\4\1\0\1\4\1\0\14\4"+
67      "\2\0\1\14\2\0\1\15\15\0\1\16\14\0\17\10"+
68      "\20\0\1\17\4\12\1\0\13\12\23\0\1\20\7\0"+
69      "\1\21\12\0\1\22\15\0\1\23\17\0\1\24\16\0"+
70      "\1\23\30\0\1\25\12\0\1\26\25\0\1\27\12\0"+
71      "\1\30\25\0\1\31\12\0\1\32\15\0\1\33\22\0"+
72      "\1\34\23\0\1\35\5\0\1\36\26\0\1\37\4\0";
73  
74    /*** 
75     * The transition table of the DFA
76     */
77    final private static int yytrans [] = yy_unpack();
78  
79  
80    /* error codes */
81    final private static int YY_UNKNOWN_ERROR = 0;
82    final private static int YY_ILLEGAL_STATE = 1;
83    final private static int YY_NO_MATCH = 2;
84    final private static int YY_PUSHBACK_2BIG = 3;
85  
86    /* error messages for the codes above */
87    final private static String YY_ERROR_MSG[] = {
88      "Unkown internal scanner error",
89      "Internal error: unknown state",
90      "Error: could not match input",
91      "Error: pushback value was too large"
92    };
93  
94    /***
95     * YY_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
96     */
97    private final static byte YY_ATTRIBUTE[] = {
98       1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  9,  0,  0,  0,  0,  0, 
99       0,  0,  9,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  9,  9
100   };
101 
102   /*** the input device */
103   private java.io.Reader yy_reader;
104 
105   /*** the current state of the DFA */
106   private int yy_state;
107 
108   /*** the current lexical state */
109   private int yy_lexical_state = YYINITIAL;
110 
111   /*** this buffer contains the current text to be matched and is
112       the source of the yytext() string */
113   private char yy_buffer[] = new char[YY_BUFFERSIZE];
114 
115   /*** the textposition at the last accepting state */
116   private int yy_markedPos;
117 
118   /*** the textposition at the last state to be included in yytext */
119   private int yy_pushbackPos;
120 
121   /*** the current text position in the buffer */
122   private int yy_currentPos;
123 
124   /*** startRead marks the beginning of the yytext() string in the buffer */
125   private int yy_startRead;
126 
127   /*** endRead marks the last character in the buffer, that has been read
128       from input */
129   private int yy_endRead;
130 
131   /*** number of newlines encountered up to the start of the matched text */
132   private int yyline;
133 
134   /*** the number of characters up to the start of the matched text */
135   private int yychar;
136 
137   /***
138    * the number of characters from the last newline up to the start of the 
139    * matched text
140    */
141   private int yycolumn; 
142 
143   /*** 
144    * yy_atBOL == true <=> the scanner is currently at the beginning of a line
145    */
146   private boolean yy_atBOL = true;
147 
148   /*** yy_atEOF == true <=> the scanner is at the EOF */
149   private boolean yy_atEOF;
150 
151   /*** denotes if the user-EOF-code has already been executed */
152   private boolean yy_eof_done;
153 
154   /* user code: */
155 	private HtmlParser yyparser;
156 	private StringBuffer text = new StringBuffer();
157 	private StringBuffer cdata = new StringBuffer();
158 
159 	private Vector tokenQueue;
160 
161 	protected boolean debug = false;
162 	public void setDebug(boolean debug) { this.debug = debug; }
163 	public boolean getDebug() { return debug; }
164 
165 	public void p(String s) { System.out.println(s); }
166 	
167 	public boolean getEOF() { return yy_atEOF; }
168 
169 	public StyleLexer(Reader r, HtmlParser p)
170 	{
171 		this(r);
172 		yyparser = p;
173 		tokenQueue = new Vector();
174 	}
175 
176 	// expected constructor to get called as an alternate SemanticLexer
177 	public StyleLexer(HtmlParser p) {
178 		this(System.in);
179 		yyparser = p;
180 		tokenQueue = new Vector();
181 	}
182 
183 	Lval lexer_yylval;
184 	Lval empty_yylval = new Lval("");
185 	private boolean first = false;
186 	private boolean last = false;
187 
188 	public void yylexerror_reset() {
189 		System.err.println("char at : " + yycharat(yy_markedPos));
190 		yypushback(yylength() - 1);  // attempt at resetting this
191 	}
192 
193 	/*** 
194 	 *   mask the actual implementation of yylex to return the first SOF
195 	 *   and the final EOF. Marking the startDocument, EndDocuemnt events
196 	 *   Also catches supposedly unrecoverable Error. Forces new ERROR_RECOVER state.
197      *   #return the token from yylex() - one of HtmlParser.XXXXX
198 	 */
199 	public int _yylex() 
200 		throws IOException
201 	{
202 		int token;
203 
204 		lexer_yylval = empty_yylval; 
205 		try {
206 	
207 			if (tokenQueue.size() > 0) {
208 				Object o = tokenQueue.remove(0);
209 				if (o instanceof Lval) {
210 					setLval((Lval)o);
211 					o = tokenQueue.remove(0);
212 				}
213 				token = ((Integer)o).intValue();
214 		    }
215 			else {
216 				token = yylex();
217 				if (token == -2) { // then a force of returning the next item in the tokenQueue
218 					Object o = tokenQueue.remove(0);
219 					if (o instanceof Lval) {
220 						setLval((Lval)o);
221 						o = tokenQueue.remove(0);
222 					}
223 					token = ((Integer)o).intValue();
224 				}
225 			}
226 		}
227 		catch (Error err) {
228 			if (getDebug()) System.err.println("Caught error " + err.getMessage());
229 			yybegin(ERROR_RECOVER);
230 			if (yyparser != null)
231 				yyparser.yylval = empty_yylval;
232 			
233 			token = yylex(); // read ahead in ERROR_REOVER
234 		}		
235 		finally {
236 			cdata.setLength(0);
237 			text.setLength(0);
238 		}
239 		return token;
240 	}
241 	
242 
243 	// set the LH side of the parser
244 	void setLval(String text)
245 	{
246 		lexer_yylval = new Lval(text);
247 			
248 		if (yyparser != null)
249 			yyparser.yylval = lexer_yylval;
250 	}
251 
252 	void setLval(Attribute a)
253 	{
254 		lexer_yylval = new Lval(a);
255 
256 		if (yyparser != null)
257 			yyparser.yylval = lexer_yylval;
258 	}
259 
260 	void setLval(Lval l) {
261 		lexer_yylval = l;
262 
263 		if (yyparser != null)
264 			yyparser.yylval = lexer_yylval;
265 	}
266 
267 
268 	/*** Return the yy_reader for this class. Can be used to provide alternate scanner 
269 	  @return the Reader for this class */
270 	public Reader getReader() { return yy_reader; }
271 	public void setReader(Reader r) { yy_reader = r; }
272 
273 
274 	/***
275   	 * Sets this lexer to the same yybuffer, and character positions as the other lexer
276      */
277 	public void setBuffer(SemanticLexer lexer) {
278 		if (this.yy_buffer.length != lexer.getyyBuffer().length) {
279       		char newBuffer[] = new char[lexer.getyyBuffer().length];
280 			this.yy_buffer = newBuffer;
281 		}
282       	System.arraycopy(lexer.getyyBuffer(), 0, yy_buffer, 0, yy_buffer.length);
283 		this.yy_currentPos = lexer.getyyCurrentPos();
284 		this.yy_markedPos = lexer.getyyMarkedPos();
285 		this.yy_pushbackPos = lexer.getyyPushbackPos();
286 		this.yy_endRead = lexer.getyyEndRead();
287 		this.yy_startRead = lexer.getyyStartRead();
288 	}
289 
290 	public char[] getyyBuffer() { return yy_buffer; }
291 	public int getyyCurrentPos() { return yy_currentPos; } 
292 	public int getyyMarkedPos() { return yy_markedPos; }
293 	public int getyyPushbackPos() { return yy_pushbackPos; }
294 	public int getyyEndRead() { return yy_endRead; }
295 	public int getyyStartRead() { return yy_startRead; }
296 	public void printBuffer() {
297 		for (int i = 0; i < yy_endRead; i++) {
298 			System.out.print(yy_buffer[i]);
299 		}
300 	}
301 
302   /***
303    * Runs the scanner on input files.
304    *
305    * This main method is the debugging routine for the scanner.
306    * It prints each returned token to System.out until the end of
307    * file is reached, or an error occured.
308    *
309    * @param argv   the command line, contains the filenames to run
310    *               the scanner on.
311    */
312   public static void main(String argv[]) {
313     for (int i = 0; i < argv.length; i++) {
314       StyleLexer scanner = null;
315       try {
316         scanner = new StyleLexer( new java.io.FileReader(argv[i]), (HtmlParser)null );
317       }
318       catch (java.io.FileNotFoundException e) {
319         System.out.println("File not found : \""+argv[i]+"\"");
320         System.exit(1);
321       }
322       catch (java.io.IOException e) {
323         System.out.println("Error opening file \""+argv[i]+"\"");
324         System.exit(1);
325       }
326       catch (ArrayIndexOutOfBoundsException e) {
327         System.out.println("Usage : java HtmlLexer <inputfile>");
328         System.exit(1);
329       }
330 
331 	  scanner.setDebug(true);
332       try {
333         do {
334           System.out.println(scanner._yylex() + " : " + scanner.yytext() +  " lval:" + scanner.lexer_yylval);
335         } while (!scanner.yy_atEOF);
336 
337       }
338       catch (java.io.IOException e) {
339         System.out.println("An I/O error occured while scanning :");
340         System.out.println(e);
341         System.exit(1);
342       }
343       catch (Exception e) {
344         e.printStackTrace();
345         System.exit(1);
346       }
347     }
348   }
349 
350 
351   /***
352    * Creates a new scanner
353    * There is also a java.io.InputStream version of this constructor.
354    *
355    * @param   in  the java.io.Reader to read input from.
356    */
357   StyleLexer(java.io.Reader in) {
358     this.yy_reader = in;
359   }
360 
361   /***
362    * Creates a new scanner.
363    * There is also java.io.Reader version of this constructor.
364    *
365    * @param   in  the java.io.Inputstream to read input from.
366    */
367   StyleLexer(java.io.InputStream in) {
368     this(new java.io.InputStreamReader(in));
369   }
370 
371   /*** 
372    * Unpacks the split, compressed DFA transition table.
373    *
374    * @return the unpacked transition table
375    */
376   private static int [] yy_unpack() {
377     int [] trans = new int[416];
378     int offset = 0;
379     offset = yy_unpack(yy_packed0, offset, trans);
380     return trans;
381   }
382 
383   /*** 
384    * Unpacks the compressed DFA transition table.
385    *
386    * @param packed   the packed transition table
387    * @return         the index of the last entry
388    */
389   private static int yy_unpack(String packed, int offset, int [] trans) {
390     int i = 0;       /* index in packed string  */
391     int j = offset;  /* index in unpacked array */
392     int l = packed.length();
393     while (i < l) {
394       int count = packed.charAt(i++);
395       int value = packed.charAt(i++);
396       value--;
397       do trans[j++] = value; while (--count > 0);
398     }
399     return j;
400   }
401 
402 
403   /***
404    * Gets the next input character.
405    *
406    * @return      the next character of the input stream, EOF if the
407    *              end of the stream is reached.
408    * @exception   IOException  if any I/O-Error occurs
409    */
410   private int yy_advance() throws java.io.IOException {
411 
412     /* standard case */
413     if (yy_currentPos < yy_endRead) return yy_buffer[yy_currentPos++];
414 
415     /* if the eof is reached, we don't need to work hard */ 
416     if (yy_atEOF) return YYEOF;
417 
418     /* otherwise: need to refill the buffer */
419 
420     /* first: make room (if you can) */
421     if (yy_startRead > 0) {
422       System.arraycopy(yy_buffer, yy_startRead, 
423                        yy_buffer, 0, 
424                        yy_endRead-yy_startRead);
425 
426       /* translate stored positions */
427       yy_endRead-= yy_startRead;
428       yy_currentPos-= yy_startRead;
429       yy_markedPos-= yy_startRead;
430       yy_pushbackPos-= yy_startRead;
431       yy_startRead = 0;
432     }
433 
434     /* is the buffer big enough? */
435     if (yy_currentPos >= yy_buffer.length) {
436       /* if not: blow it up */
437       char newBuffer[] = new char[yy_currentPos*2];
438       System.arraycopy(yy_buffer, 0, newBuffer, 0, yy_buffer.length);
439       yy_buffer = newBuffer;
440     }
441 
442     /* finally: fill the buffer with new input */
443     int numRead = yy_reader.read(yy_buffer, yy_endRead, 
444                                             yy_buffer.length-yy_endRead);
445 
446     if ( numRead == -1 ) return YYEOF;
447 
448     yy_endRead+= numRead;
449 
450     return yy_buffer[yy_currentPos++];
451   }
452 
453 
454   /***
455    * Closes the input stream.
456    */
457   final public void yyclose() throws java.io.IOException {
458     yy_atEOF = true;            /* indicate end of file */
459     yy_endRead = yy_startRead;  /* invalidate buffer    */
460 
461     if (yy_reader != null)
462       yy_reader.close();
463   }
464 
465 
466   /***
467    * Closes the current stream, and resets the
468    * scanner to read from a new input stream.
469    *
470    * All internal variables are reset, the old input stream 
471    * <b>cannot</b> be reused (internal buffer is discarded and lost).
472    * Lexical state is set to <tt>YY_INITIAL</tt>.
473    *
474    * @param reader   the new input stream 
475    */
476   final public void yyreset(java.io.Reader reader) throws java.io.IOException {
477     yyclose();
478     yy_reader = reader;
479     yy_atBOL  = true;
480     yy_atEOF  = false;
481     yy_endRead = yy_startRead = 0;
482     yy_currentPos = yy_markedPos = yy_pushbackPos = 0;
483     yyline = yychar = yycolumn = 0;
484     yy_lexical_state = YYINITIAL;
485   }
486 
487 
488   /***
489    * Returns the current lexical state.
490    */
491   final public int yystate() {
492     return yy_lexical_state;
493   }
494 
495 
496   /***
497    * Enters a new lexical state
498    *
499    * @param newState the new lexical state
500    */
501   final public void yybegin(int newState) {
502     yy_lexical_state = newState;
503   }
504 
505 
506   /***
507    * Returns the text matched by the current regular expression.
508    */
509   final public String yytext() {
510     return new String( yy_buffer, yy_startRead, yy_markedPos-yy_startRead );
511   }
512 
513 
514   /***
515    * Returns the character at position <tt>pos</tt> from the 
516    * matched text. 
517    * 
518    * It is equivalent to yytext().charAt(pos), but faster
519    *
520    * @param pos the position of the character to fetch. 
521    *            A value from 0 to yylength()-1.
522    *
523    * @return the character at position pos
524    */
525   final public char yycharat(int pos) {
526     return yy_buffer[yy_startRead+pos];
527   }
528 
529 
530   /***
531    * Returns the length of the matched text region.
532    */
533   final public int yylength() {
534     return yy_markedPos-yy_startRead;
535   }
536 
537 
538   /***
539    * Reports an error that occured while scanning.
540    *
541    * In a wellformed scanner (no or only correct usage of 
542    * yypushback(int) and a match-all fallback rule) this method 
543    * will only be called with things that "Can't Possibly Happen".
544    * If this method is called, something is seriously wrong
545    * (e.g. a JFlex bug producing a faulty scanner etc.).
546    *
547    * Usual syntax/scanner level error handling should be done
548    * in error fallback rules.
549    *
550    * @param   errorCode  the code of the errormessage to display
551    */
552   private void yy_ScanError(int errorCode) {
553     String message;
554     try {
555       message = YY_ERROR_MSG[errorCode];
556     }
557     catch (ArrayIndexOutOfBoundsException e) {
558       message = YY_ERROR_MSG[YY_UNKNOWN_ERROR];
559     }
560 
561     throw new Error(message);
562   } 
563 
564 
565   /***
566    * Pushes the specified amount of characters back into the input stream.
567    *
568    * They will be read again by then next call of the scanning method
569    *
570    * @param number  the number of characters to be read again.
571    *                This number must not be greater than yylength()!
572    */
573   private void yypushback(int number)  {
574     if ( number > yylength() )
575       yy_ScanError(YY_PUSHBACK_2BIG);
576 
577     yy_markedPos -= number;
578   }
579 
580 
581   /***
582    * Contains user EOF-code, which will be executed exactly once,
583    * when the end of file is reached
584    */
585   private void yy_do_eof() throws java.io.IOException {
586     if (!yy_eof_done) {
587       yy_eof_done = true;
588       yyclose();
589     }
590   }
591 
592 
593   /***
594    * Resumes scanning until the next regular expression is matched,
595    * the end of input is encountered or an I/O-Error occurs.
596    *
597    * @return      the next token
598    * @exception   IOException  if any I/O-Error occurs
599    */
600   public int yylex() throws java.io.IOException {
601     int yy_input;
602     int yy_action;
603 
604 
605     while (true) {
606 
607       yy_action = -1;
608 
609       yy_currentPos = yy_startRead = yy_markedPos;
610 
611       yy_state = yy_lexical_state;
612 
613 
614       yy_forAction: {
615         while (true) {
616 
617           yy_input = yy_advance();
618 
619           if ( yy_input == YYEOF ) break yy_forAction;
620 
621           int yy_next = yytrans[ yy_rowMap[yy_state] + yycmap[yy_input] ];
622           if (yy_next == -1) break yy_forAction;
623           yy_state = yy_next;
624 
625           int yy_attributes = YY_ATTRIBUTE[yy_state];
626           if ( (yy_attributes & 1) > 0 ) {
627             yy_action = yy_state; 
628             yy_markedPos = yy_currentPos; 
629             if ( (yy_attributes & 8) > 0 ) break yy_forAction;
630           }
631 
632         }
633       }
634 
635 
636       switch (yy_action) {
637 
638         case 19: 
639           { 
640         yybegin(YYINITIAL);
641 		tokenQueue.add(new Lval(cdata.toString()));  
642 		tokenQueue.add(new Integer(HtmlParser.TEXT));
643 		tokenQueue.add(new Integer(HtmlParser.CDATA_END));
644 		return -2;
645      }
646         case 32: break;
647         case 1: 
648         case 7: 
649         case 8: 
650           { 
651 		cdata.append(yytext());
652      }
653         case 33: break;
654         case 4: 
655         case 6: 
656           {  
657 		text.append(yytext());
658 	 }
659         case 34: break;
660         case 0: 
661         case 3: 
662           {  
663 		text.append(yytext());
664 		tokenQueue.add(new Lval(new String(text.toString())));
665 		tokenQueue.add(new Integer(HtmlParser.TEXT));
666 		text.setLength(0);
667 	 }
668         case 35: break;
669         case 30: 
670           { 
671 		yybegin(IGNORE_CDATA);
672 		cdata.setLength(0);
673 		return HtmlParser.CDATA_START;
674 	 }
675         case 36: break;
676         case 29: 
677           { 
678 		tokenQueue.add(new Integer(HtmlParser.ANGLE_END_OPEN)); // </
679 		String name = yytext();
680 		name = name.substring(2, 7);  						    // original case of style, Style, STYLE
681 		tokenQueue.add(new Lval(name));
682 		tokenQueue.add(new Integer(HtmlParser.NAME));			// style
683 		tokenQueue.add(new Integer(HtmlParser.ANGLE_CLOSE));   // >
684 		return -2;
685 	 }
686         case 37: break;
687         case 10: 
688           { 
689 		yybegin(YYINITIAL);
690 		if (getDebug()) System.err.println("ERROR_RECOVER: recovered");
691 		return HtmlParser.ANGLE_CLOSE;
692 	 }
693         case 38: break;
694         case 5: 
695           { 
696 		text.append(yytext());
697 	 }
698         case 39: break;
699         case 2: 
700         case 9: 
701           { 
702 		/* ignore all this till recover */
703 	 }
704         case 40: break;
705         case 18: 
706           {  }
707         case 41: break;
708         default: 
709           if (yy_input == YYEOF && yy_startRead == yy_currentPos) {
710             yy_atEOF = true;
711             yy_do_eof();
712               { return 0; }
713           } 
714           else {
715             yy_ScanError(YY_NO_MATCH);
716           }
717       }
718     }
719   }
720 
721 
722 }