1
2
3
4 package hotsax.html.sax;
5
6 import java.io.*;
7 import java.util.*;
8
9
10 /***
11 * This class is a scanner generated by
12 * <a href="http://www.jflex.de/">JFlex</a> 1.3.2
13 * on 8/10/01 4:37 PM from the specification file
14 * <tt>file:/home/edh/sourceforge/HotSAX/./build/src/hotsax/html/sax/StyleLexer.flex</tt>
15 */
16 class StyleLexer implements SemanticLexer {
17
18 /*** This character denotes the end of file */
19 final public static int YYEOF = -1;
20
21 /*** initial size of the lookahead buffer */
22 final private static int YY_BUFFERSIZE = 16384;
23
24 /*** lexical states */
25 final public static int ERROR_RECOVER = 2;
26 final public static int IGNORE_CDATA = 1;
27 final public static int YYINITIAL = 0;
28
29 /***
30 * Translates characters to character classes
31 */
32 final private static char [] yycmap = {
33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 5,
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0,
37 0, 14, 0, 12, 13, 10, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0,
38 0, 0, 0, 6, 7, 0, 0, 0, 0, 8, 0, 11, 0, 15, 0, 0,
39 0, 14, 0, 12, 13, 10, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0,
40 0, 0, 0, 6, 7, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0,
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
49 };
50
51 /***
52 * Translates a state to a row index in the transition table
53 */
54 final private static int yy_rowMap [] = {
55 0, 16, 32, 48, 64, 80, 48, 96, 112, 128,
56 144, 160, 176, 192, 208, 224, 240, 256, 144, 144,
57 272, 288, 304, 320, 336, 352, 368, 384, 400, 144,
58 144
59 };
60
61 /***
62 * The packed transition table of the DFA (part 0)
63 */
64 final private static String yy_packed0 =
65 "\1\4\1\5\1\4\1\6\1\7\13\4\17\10\1\11"+
66 "\4\12\1\13\13\12\1\4\1\0\1\4\1\0\14\4"+
67 "\2\0\1\14\2\0\1\15\15\0\1\16\14\0\17\10"+
68 "\20\0\1\17\4\12\1\0\13\12\23\0\1\20\7\0"+
69 "\1\21\12\0\1\22\15\0\1\23\17\0\1\24\16\0"+
70 "\1\23\30\0\1\25\12\0\1\26\25\0\1\27\12\0"+
71 "\1\30\25\0\1\31\12\0\1\32\15\0\1\33\22\0"+
72 "\1\34\23\0\1\35\5\0\1\36\26\0\1\37\4\0";
73
74 /***
75 * The transition table of the DFA
76 */
77 final private static int yytrans [] = yy_unpack();
78
79
80
81 final private static int YY_UNKNOWN_ERROR = 0;
82 final private static int YY_ILLEGAL_STATE = 1;
83 final private static int YY_NO_MATCH = 2;
84 final private static int YY_PUSHBACK_2BIG = 3;
85
86
87 final private static String YY_ERROR_MSG[] = {
88 "Unkown internal scanner error",
89 "Internal error: unknown state",
90 "Error: could not match input",
91 "Error: pushback value was too large"
92 };
93
94 /***
95 * YY_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
96 */
97 private final static byte YY_ATTRIBUTE[] = {
98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 0, 0, 0, 0, 0,
99 0, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9
100 };
101
102 /*** the input device */
103 private java.io.Reader yy_reader;
104
105 /*** the current state of the DFA */
106 private int yy_state;
107
108 /*** the current lexical state */
109 private int yy_lexical_state = YYINITIAL;
110
111 /*** this buffer contains the current text to be matched and is
112 the source of the yytext() string */
113 private char yy_buffer[] = new char[YY_BUFFERSIZE];
114
115 /*** the textposition at the last accepting state */
116 private int yy_markedPos;
117
118 /*** the textposition at the last state to be included in yytext */
119 private int yy_pushbackPos;
120
121 /*** the current text position in the buffer */
122 private int yy_currentPos;
123
124 /*** startRead marks the beginning of the yytext() string in the buffer */
125 private int yy_startRead;
126
127 /*** endRead marks the last character in the buffer, that has been read
128 from input */
129 private int yy_endRead;
130
131 /*** number of newlines encountered up to the start of the matched text */
132 private int yyline;
133
134 /*** the number of characters up to the start of the matched text */
135 private int yychar;
136
137 /***
138 * the number of characters from the last newline up to the start of the
139 * matched text
140 */
141 private int yycolumn;
142
143 /***
144 * yy_atBOL == true <=> the scanner is currently at the beginning of a line
145 */
146 private boolean yy_atBOL = true;
147
148 /*** yy_atEOF == true <=> the scanner is at the EOF */
149 private boolean yy_atEOF;
150
151 /*** denotes if the user-EOF-code has already been executed */
152 private boolean yy_eof_done;
153
154
155 private HtmlParser yyparser;
156 private StringBuffer text = new StringBuffer();
157 private StringBuffer cdata = new StringBuffer();
158
159 private Vector tokenQueue;
160
161 protected boolean debug = false;
162 public void setDebug(boolean debug) { this.debug = debug; }
163 public boolean getDebug() { return debug; }
164
165 public void p(String s) { System.out.println(s); }
166
167 public boolean getEOF() { return yy_atEOF; }
168
169 public StyleLexer(Reader r, HtmlParser p)
170 {
171 this(r);
172 yyparser = p;
173 tokenQueue = new Vector();
174 }
175
176
177 public StyleLexer(HtmlParser p) {
178 this(System.in);
179 yyparser = p;
180 tokenQueue = new Vector();
181 }
182
183 Lval lexer_yylval;
184 Lval empty_yylval = new Lval("");
185 private boolean first = false;
186 private boolean last = false;
187
188 public void yylexerror_reset() {
189 System.err.println("char at : " + yycharat(yy_markedPos));
190 yypushback(yylength() - 1);
191 }
192
193 /***
194 * mask the actual implementation of yylex to return the first SOF
195 * and the final EOF. Marking the startDocument, EndDocuemnt events
196 * Also catches supposedly unrecoverable Error. Forces new ERROR_RECOVER state.
197 * #return the token from yylex() - one of HtmlParser.XXXXX
198 */
199 public int _yylex()
200 throws IOException
201 {
202 int token;
203
204 lexer_yylval = empty_yylval;
205 try {
206
207 if (tokenQueue.size() > 0) {
208 Object o = tokenQueue.remove(0);
209 if (o instanceof Lval) {
210 setLval((Lval)o);
211 o = tokenQueue.remove(0);
212 }
213 token = ((Integer)o).intValue();
214 }
215 else {
216 token = yylex();
217 if (token == -2) {
218 Object o = tokenQueue.remove(0);
219 if (o instanceof Lval) {
220 setLval((Lval)o);
221 o = tokenQueue.remove(0);
222 }
223 token = ((Integer)o).intValue();
224 }
225 }
226 }
227 catch (Error err) {
228 if (getDebug()) System.err.println("Caught error " + err.getMessage());
229 yybegin(ERROR_RECOVER);
230 if (yyparser != null)
231 yyparser.yylval = empty_yylval;
232
233 token = yylex();
234 }
235 finally {
236 cdata.setLength(0);
237 text.setLength(0);
238 }
239 return token;
240 }
241
242
243
244 void setLval(String text)
245 {
246 lexer_yylval = new Lval(text);
247
248 if (yyparser != null)
249 yyparser.yylval = lexer_yylval;
250 }
251
252 void setLval(Attribute a)
253 {
254 lexer_yylval = new Lval(a);
255
256 if (yyparser != null)
257 yyparser.yylval = lexer_yylval;
258 }
259
260 void setLval(Lval l) {
261 lexer_yylval = l;
262
263 if (yyparser != null)
264 yyparser.yylval = lexer_yylval;
265 }
266
267
268 /*** Return the yy_reader for this class. Can be used to provide alternate scanner
269 @return the Reader for this class */
270 public Reader getReader() { return yy_reader; }
271 public void setReader(Reader r) { yy_reader = r; }
272
273
274 /***
275 * Sets this lexer to the same yybuffer, and character positions as the other lexer
276 */
277 public void setBuffer(SemanticLexer lexer) {
278 if (this.yy_buffer.length != lexer.getyyBuffer().length) {
279 char newBuffer[] = new char[lexer.getyyBuffer().length];
280 this.yy_buffer = newBuffer;
281 }
282 System.arraycopy(lexer.getyyBuffer(), 0, yy_buffer, 0, yy_buffer.length);
283 this.yy_currentPos = lexer.getyyCurrentPos();
284 this.yy_markedPos = lexer.getyyMarkedPos();
285 this.yy_pushbackPos = lexer.getyyPushbackPos();
286 this.yy_endRead = lexer.getyyEndRead();
287 this.yy_startRead = lexer.getyyStartRead();
288 }
289
290 public char[] getyyBuffer() { return yy_buffer; }
291 public int getyyCurrentPos() { return yy_currentPos; }
292 public int getyyMarkedPos() { return yy_markedPos; }
293 public int getyyPushbackPos() { return yy_pushbackPos; }
294 public int getyyEndRead() { return yy_endRead; }
295 public int getyyStartRead() { return yy_startRead; }
296 public void printBuffer() {
297 for (int i = 0; i < yy_endRead; i++) {
298 System.out.print(yy_buffer[i]);
299 }
300 }
301
302 /***
303 * Runs the scanner on input files.
304 *
305 * This main method is the debugging routine for the scanner.
306 * It prints each returned token to System.out until the end of
307 * file is reached, or an error occured.
308 *
309 * @param argv the command line, contains the filenames to run
310 * the scanner on.
311 */
312 public static void main(String argv[]) {
313 for (int i = 0; i < argv.length; i++) {
314 StyleLexer scanner = null;
315 try {
316 scanner = new StyleLexer( new java.io.FileReader(argv[i]), (HtmlParser)null );
317 }
318 catch (java.io.FileNotFoundException e) {
319 System.out.println("File not found : \""+argv[i]+"\"");
320 System.exit(1);
321 }
322 catch (java.io.IOException e) {
323 System.out.println("Error opening file \""+argv[i]+"\"");
324 System.exit(1);
325 }
326 catch (ArrayIndexOutOfBoundsException e) {
327 System.out.println("Usage : java HtmlLexer <inputfile>");
328 System.exit(1);
329 }
330
331 scanner.setDebug(true);
332 try {
333 do {
334 System.out.println(scanner._yylex() + " : " + scanner.yytext() + " lval:" + scanner.lexer_yylval);
335 } while (!scanner.yy_atEOF);
336
337 }
338 catch (java.io.IOException e) {
339 System.out.println("An I/O error occured while scanning :");
340 System.out.println(e);
341 System.exit(1);
342 }
343 catch (Exception e) {
344 e.printStackTrace();
345 System.exit(1);
346 }
347 }
348 }
349
350
351 /***
352 * Creates a new scanner
353 * There is also a java.io.InputStream version of this constructor.
354 *
355 * @param in the java.io.Reader to read input from.
356 */
357 StyleLexer(java.io.Reader in) {
358 this.yy_reader = in;
359 }
360
361 /***
362 * Creates a new scanner.
363 * There is also java.io.Reader version of this constructor.
364 *
365 * @param in the java.io.Inputstream to read input from.
366 */
367 StyleLexer(java.io.InputStream in) {
368 this(new java.io.InputStreamReader(in));
369 }
370
371 /***
372 * Unpacks the split, compressed DFA transition table.
373 *
374 * @return the unpacked transition table
375 */
376 private static int [] yy_unpack() {
377 int [] trans = new int[416];
378 int offset = 0;
379 offset = yy_unpack(yy_packed0, offset, trans);
380 return trans;
381 }
382
383 /***
384 * Unpacks the compressed DFA transition table.
385 *
386 * @param packed the packed transition table
387 * @return the index of the last entry
388 */
389 private static int yy_unpack(String packed, int offset, int [] trans) {
390 int i = 0;
391 int j = offset;
392 int l = packed.length();
393 while (i < l) {
394 int count = packed.charAt(i++);
395 int value = packed.charAt(i++);
396 value--;
397 do trans[j++] = value; while (--count > 0);
398 }
399 return j;
400 }
401
402
403 /***
404 * Gets the next input character.
405 *
406 * @return the next character of the input stream, EOF if the
407 * end of the stream is reached.
408 * @exception IOException if any I/O-Error occurs
409 */
410 private int yy_advance() throws java.io.IOException {
411
412
413 if (yy_currentPos < yy_endRead) return yy_buffer[yy_currentPos++];
414
415
416 if (yy_atEOF) return YYEOF;
417
418
419
420
421 if (yy_startRead > 0) {
422 System.arraycopy(yy_buffer, yy_startRead,
423 yy_buffer, 0,
424 yy_endRead-yy_startRead);
425
426
427 yy_endRead-= yy_startRead;
428 yy_currentPos-= yy_startRead;
429 yy_markedPos-= yy_startRead;
430 yy_pushbackPos-= yy_startRead;
431 yy_startRead = 0;
432 }
433
434
435 if (yy_currentPos >= yy_buffer.length) {
436
437 char newBuffer[] = new char[yy_currentPos*2];
438 System.arraycopy(yy_buffer, 0, newBuffer, 0, yy_buffer.length);
439 yy_buffer = newBuffer;
440 }
441
442
443 int numRead = yy_reader.read(yy_buffer, yy_endRead,
444 yy_buffer.length-yy_endRead);
445
446 if ( numRead == -1 ) return YYEOF;
447
448 yy_endRead+= numRead;
449
450 return yy_buffer[yy_currentPos++];
451 }
452
453
454 /***
455 * Closes the input stream.
456 */
457 final public void yyclose() throws java.io.IOException {
458 yy_atEOF = true;
459 yy_endRead = yy_startRead;
460
461 if (yy_reader != null)
462 yy_reader.close();
463 }
464
465
466 /***
467 * Closes the current stream, and resets the
468 * scanner to read from a new input stream.
469 *
470 * All internal variables are reset, the old input stream
471 * <b>cannot</b> be reused (internal buffer is discarded and lost).
472 * Lexical state is set to <tt>YY_INITIAL</tt>.
473 *
474 * @param reader the new input stream
475 */
476 final public void yyreset(java.io.Reader reader) throws java.io.IOException {
477 yyclose();
478 yy_reader = reader;
479 yy_atBOL = true;
480 yy_atEOF = false;
481 yy_endRead = yy_startRead = 0;
482 yy_currentPos = yy_markedPos = yy_pushbackPos = 0;
483 yyline = yychar = yycolumn = 0;
484 yy_lexical_state = YYINITIAL;
485 }
486
487
488 /***
489 * Returns the current lexical state.
490 */
491 final public int yystate() {
492 return yy_lexical_state;
493 }
494
495
496 /***
497 * Enters a new lexical state
498 *
499 * @param newState the new lexical state
500 */
501 final public void yybegin(int newState) {
502 yy_lexical_state = newState;
503 }
504
505
506 /***
507 * Returns the text matched by the current regular expression.
508 */
509 final public String yytext() {
510 return new String( yy_buffer, yy_startRead, yy_markedPos-yy_startRead );
511 }
512
513
514 /***
515 * Returns the character at position <tt>pos</tt> from the
516 * matched text.
517 *
518 * It is equivalent to yytext().charAt(pos), but faster
519 *
520 * @param pos the position of the character to fetch.
521 * A value from 0 to yylength()-1.
522 *
523 * @return the character at position pos
524 */
525 final public char yycharat(int pos) {
526 return yy_buffer[yy_startRead+pos];
527 }
528
529
530 /***
531 * Returns the length of the matched text region.
532 */
533 final public int yylength() {
534 return yy_markedPos-yy_startRead;
535 }
536
537
538 /***
539 * Reports an error that occured while scanning.
540 *
541 * In a wellformed scanner (no or only correct usage of
542 * yypushback(int) and a match-all fallback rule) this method
543 * will only be called with things that "Can't Possibly Happen".
544 * If this method is called, something is seriously wrong
545 * (e.g. a JFlex bug producing a faulty scanner etc.).
546 *
547 * Usual syntax/scanner level error handling should be done
548 * in error fallback rules.
549 *
550 * @param errorCode the code of the errormessage to display
551 */
552 private void yy_ScanError(int errorCode) {
553 String message;
554 try {
555 message = YY_ERROR_MSG[errorCode];
556 }
557 catch (ArrayIndexOutOfBoundsException e) {
558 message = YY_ERROR_MSG[YY_UNKNOWN_ERROR];
559 }
560
561 throw new Error(message);
562 }
563
564
565 /***
566 * Pushes the specified amount of characters back into the input stream.
567 *
568 * They will be read again by then next call of the scanning method
569 *
570 * @param number the number of characters to be read again.
571 * This number must not be greater than yylength()!
572 */
573 private void yypushback(int number) {
574 if ( number > yylength() )
575 yy_ScanError(YY_PUSHBACK_2BIG);
576
577 yy_markedPos -= number;
578 }
579
580
581 /***
582 * Contains user EOF-code, which will be executed exactly once,
583 * when the end of file is reached
584 */
585 private void yy_do_eof() throws java.io.IOException {
586 if (!yy_eof_done) {
587 yy_eof_done = true;
588 yyclose();
589 }
590 }
591
592
593 /***
594 * Resumes scanning until the next regular expression is matched,
595 * the end of input is encountered or an I/O-Error occurs.
596 *
597 * @return the next token
598 * @exception IOException if any I/O-Error occurs
599 */
600 public int yylex() throws java.io.IOException {
601 int yy_input;
602 int yy_action;
603
604
605 while (true) {
606
607 yy_action = -1;
608
609 yy_currentPos = yy_startRead = yy_markedPos;
610
611 yy_state = yy_lexical_state;
612
613
614 yy_forAction: {
615 while (true) {
616
617 yy_input = yy_advance();
618
619 if ( yy_input == YYEOF ) break yy_forAction;
620
621 int yy_next = yytrans[ yy_rowMap[yy_state] + yycmap[yy_input] ];
622 if (yy_next == -1) break yy_forAction;
623 yy_state = yy_next;
624
625 int yy_attributes = YY_ATTRIBUTE[yy_state];
626 if ( (yy_attributes & 1) > 0 ) {
627 yy_action = yy_state;
628 yy_markedPos = yy_currentPos;
629 if ( (yy_attributes & 8) > 0 ) break yy_forAction;
630 }
631
632 }
633 }
634
635
636 switch (yy_action) {
637
638 case 19:
639 {
640 yybegin(YYINITIAL);
641 tokenQueue.add(new Lval(cdata.toString()));
642 tokenQueue.add(new Integer(HtmlParser.TEXT));
643 tokenQueue.add(new Integer(HtmlParser.CDATA_END));
644 return -2;
645 }
646 case 32: break;
647 case 1:
648 case 7:
649 case 8:
650 {
651 cdata.append(yytext());
652 }
653 case 33: break;
654 case 4:
655 case 6:
656 {
657 text.append(yytext());
658 }
659 case 34: break;
660 case 0:
661 case 3:
662 {
663 text.append(yytext());
664 tokenQueue.add(new Lval(new String(text.toString())));
665 tokenQueue.add(new Integer(HtmlParser.TEXT));
666 text.setLength(0);
667 }
668 case 35: break;
669 case 30:
670 {
671 yybegin(IGNORE_CDATA);
672 cdata.setLength(0);
673 return HtmlParser.CDATA_START;
674 }
675 case 36: break;
676 case 29:
677 {
678 tokenQueue.add(new Integer(HtmlParser.ANGLE_END_OPEN));
679 String name = yytext();
680 name = name.substring(2, 7);
681 tokenQueue.add(new Lval(name));
682 tokenQueue.add(new Integer(HtmlParser.NAME));
683 tokenQueue.add(new Integer(HtmlParser.ANGLE_CLOSE));
684 return -2;
685 }
686 case 37: break;
687 case 10:
688 {
689 yybegin(YYINITIAL);
690 if (getDebug()) System.err.println("ERROR_RECOVER: recovered");
691 return HtmlParser.ANGLE_CLOSE;
692 }
693 case 38: break;
694 case 5:
695 {
696 text.append(yytext());
697 }
698 case 39: break;
699 case 2:
700 case 9:
701 {
702
703 }
704 case 40: break;
705 case 18:
706 { }
707 case 41: break;
708 default:
709 if (yy_input == YYEOF && yy_startRead == yy_currentPos) {
710 yy_atEOF = true;
711 yy_do_eof();
712 { return 0; }
713 }
714 else {
715 yy_ScanError(YY_NO_MATCH);
716 }
717 }
718 }
719 }
720
721
722 }