1
2
3
4 package hotsax.html.sax;
5
6 import java.io.*;
7 import java.util.*;
8
9
10 /***
11 * This class is a scanner generated by
12 * <a href="http://www.jflex.de/">JFlex</a> 1.3.2
13 * on 8/10/01 4:37 PM from the specification file
14 * <tt>file:/home/edh/sourceforge/HotSAX/./build/src/hotsax/html/sax/ScriptLexer.flex</tt>
15 */
16 class ScriptLexer implements SemanticLexer {
17
18 /*** This character denotes the end of file */
19 final public static int YYEOF = -1;
20
21 /*** initial size of the lookahead buffer */
22 final private static int YY_BUFFERSIZE = 16384;
23
24 /*** lexical states */
25 final public static int ERROR_RECOVER = 2;
26 final public static int IGNORE_CDATA = 1;
27 final public static int YYINITIAL = 0;
28
29 /***
30 * Translates characters to character classes
31 */
32 final private static char [] yycmap = {
33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 5,
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0,
37 0, 14, 0, 7, 13, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0,
38 10, 0, 8, 6, 11, 0, 0, 0, 0, 0, 0, 12, 0, 15, 0, 0,
39 0, 14, 0, 7, 13, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0,
40 10, 0, 8, 6, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
49 };
50
51 /***
52 * Translates a state to a row index in the transition table
53 */
54 final private static int yy_rowMap [] = {
55 0, 16, 32, 48, 64, 80, 48, 96, 112, 128,
56 144, 160, 176, 192, 208, 224, 240, 256, 144, 144,
57 272, 288, 304, 320, 336, 352, 368, 384, 400, 416,
58 144, 144
59 };
60
61 /***
62 * The packed transition table of the DFA (part 0)
63 */
64 final private static String yy_packed0 =
65 "\1\4\1\5\1\4\1\6\1\7\13\4\17\10\1\11"+
66 "\4\12\1\13\13\12\1\4\1\0\1\4\1\0\14\4"+
67 "\2\0\1\14\2\0\1\15\15\0\1\16\14\0\17\10"+
68 "\20\0\1\17\4\12\1\0\13\12\23\0\1\20\10\0"+
69 "\1\21\11\0\1\22\15\0\1\23\17\0\1\24\16\0"+
70 "\1\23\23\0\1\25\17\0\1\26\25\0\1\27\12\0"+
71 "\1\30\25\0\1\31\12\0\1\32\21\0\1\33\16\0"+
72 "\1\34\23\0\1\35\14\0\1\36\20\0\1\37\7\0"+
73 "\1\40\13\0";
74
75 /***
76 * The transition table of the DFA
77 */
78 final private static int yytrans [] = yy_unpack();
79
80
81
82 final private static int YY_UNKNOWN_ERROR = 0;
83 final private static int YY_ILLEGAL_STATE = 1;
84 final private static int YY_NO_MATCH = 2;
85 final private static int YY_PUSHBACK_2BIG = 3;
86
87
88 final private static String YY_ERROR_MSG[] = {
89 "Unkown internal scanner error",
90 "Internal error: unknown state",
91 "Error: could not match input",
92 "Error: pushback value was too large"
93 };
94
95 /***
96 * YY_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
97 */
98 private final static byte YY_ATTRIBUTE[] = {
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 0, 0, 0, 0, 0,
100 0, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9
101 };
102
103 /*** the input device */
104 private java.io.Reader yy_reader;
105
106 /*** the current state of the DFA */
107 private int yy_state;
108
109 /*** the current lexical state */
110 private int yy_lexical_state = YYINITIAL;
111
112 /*** this buffer contains the current text to be matched and is
113 the source of the yytext() string */
114 private char yy_buffer[] = new char[YY_BUFFERSIZE];
115
116 /*** the textposition at the last accepting state */
117 private int yy_markedPos;
118
119 /*** the textposition at the last state to be included in yytext */
120 private int yy_pushbackPos;
121
122 /*** the current text position in the buffer */
123 private int yy_currentPos;
124
125 /*** startRead marks the beginning of the yytext() string in the buffer */
126 private int yy_startRead;
127
128 /*** endRead marks the last character in the buffer, that has been read
129 from input */
130 private int yy_endRead;
131
132 /*** number of newlines encountered up to the start of the matched text */
133 private int yyline;
134
135 /*** the number of characters up to the start of the matched text */
136 private int yychar;
137
138 /***
139 * the number of characters from the last newline up to the start of the
140 * matched text
141 */
142 private int yycolumn;
143
144 /***
145 * yy_atBOL == true <=> the scanner is currently at the beginning of a line
146 */
147 private boolean yy_atBOL = true;
148
149 /*** yy_atEOF == true <=> the scanner is at the EOF */
150 private boolean yy_atEOF;
151
152 /*** denotes if the user-EOF-code has already been executed */
153 private boolean yy_eof_done;
154
155
156 private HtmlParser yyparser;
157 private StringBuffer text = new StringBuffer();
158 private StringBuffer cdata = new StringBuffer();
159
160 private Vector tokenQueue;
161
162 protected boolean debug = false;
163 public void setDebug(boolean debug) { this.debug = debug; }
164 public boolean getDebug() { return debug; }
165
166 public void p(String s) { System.out.println(s); }
167
168 public boolean getEOF() { return yy_atEOF; }
169
170 public ScriptLexer(Reader r, HtmlParser p)
171 {
172 this(r);
173 yyparser = p;
174 tokenQueue = new Vector();
175 }
176
177
178 public ScriptLexer(HtmlParser p) {
179 this(System.in);
180 yyparser = p;
181 tokenQueue = new Vector();
182 }
183
184 Lval lexer_yylval;
185 Lval empty_yylval = new Lval("");
186 private boolean first = false;
187 private boolean last = false;
188
189 public void yylexerror_reset() {
190 System.err.println("char at : " + yycharat(yy_markedPos));
191 yypushback(yylength() - 1);
192 }
193
194 /***
195 * mask the actual implementation of yylex to return the first SOF
196 * and the final EOF. Marking the startDocument, EndDocuemnt events
197 * Also catches supposedly unrecoverable Error. Forces new ERROR_RECOVER state.
198 * #return the token from yylex() - one of HtmlParser.XXXXX
199 */
200 public int _yylex()
201 throws IOException
202 {
203 int token;
204
205 lexer_yylval = empty_yylval;
206 try {
207
208 if (tokenQueue.size() > 0) {
209 Object o = tokenQueue.remove(0);
210 if (o instanceof Lval) {
211 setLval((Lval)o);
212 o = tokenQueue.remove(0);
213 }
214 token = ((Integer)o).intValue();
215 }
216 else {
217 token = yylex();
218 if (token == -2) {
219 Object o = tokenQueue.remove(0);
220 if (o instanceof Lval) {
221 setLval((Lval)o);
222 o = tokenQueue.remove(0);
223 }
224 token = ((Integer)o).intValue();
225 }
226 }
227 }
228 catch (Error err) {
229 if (getDebug()) System.err.println("Caught error " + err.getMessage());
230 yybegin(ERROR_RECOVER);
231 if (yyparser != null)
232 yyparser.yylval = empty_yylval;
233
234 token = yylex();
235 }
236 finally {
237 cdata.setLength(0);
238 text.setLength(0);
239 }
240 return token;
241 }
242
243
244
245 void setLval(String text)
246 {
247 lexer_yylval = new Lval(text);
248
249 if (yyparser != null)
250 yyparser.yylval = lexer_yylval;
251 }
252
253 void setLval(Attribute a)
254 {
255 lexer_yylval = new Lval(a);
256
257 if (yyparser != null)
258 yyparser.yylval = lexer_yylval;
259 }
260
261 void setLval(Lval l) {
262 lexer_yylval = l;
263
264 if (yyparser != null)
265 yyparser.yylval = lexer_yylval;
266 }
267
268
269 /*** Return the yy_reader for this class. Can be used to provide alternate scanner
270 @return the Reader for this class */
271 public Reader getReader() { return yy_reader; }
272 public void setReader(Reader r) { yy_reader = r; }
273
274
275 /***
276 * Sets this lexer to the same yybuffer, and character positions as the other lexer
277 */
278 public void setBuffer(SemanticLexer lexer) {
279 if (this.yy_buffer.length != lexer.getyyBuffer().length) {
280 char newBuffer[] = new char[lexer.getyyBuffer().length];
281 this.yy_buffer = newBuffer;
282 }
283 System.arraycopy(lexer.getyyBuffer(), 0, yy_buffer, 0, yy_buffer.length);
284 this.yy_currentPos = lexer.getyyCurrentPos();
285 this.yy_markedPos = lexer.getyyMarkedPos();
286 this.yy_pushbackPos = lexer.getyyPushbackPos();
287 this.yy_endRead = lexer.getyyEndRead();
288 this.yy_startRead = lexer.getyyStartRead();
289 }
290
291 public char[] getyyBuffer() { return yy_buffer; }
292 public int getyyCurrentPos() { return yy_currentPos; }
293 public int getyyMarkedPos() { return yy_markedPos; }
294 public int getyyPushbackPos() { return yy_pushbackPos; }
295 public int getyyEndRead() { return yy_endRead; }
296 public int getyyStartRead() { return yy_startRead; }
297 public void printBuffer() {
298 for (int i = 0; i < yy_endRead; i++) {
299 System.out.print(yy_buffer[i]);
300 }
301 }
302
303 /***
304 * Runs the scanner on input files.
305 *
306 * This main method is the debugging routine for the scanner.
307 * It prints each returned token to System.out until the end of
308 * file is reached, or an error occured.
309 *
310 * @param argv the command line, contains the filenames to run
311 * the scanner on.
312 */
313 public static void main(String argv[]) {
314 for (int i = 0; i < argv.length; i++) {
315 ScriptLexer scanner = null;
316 try {
317 scanner = new ScriptLexer( new java.io.FileReader(argv[i]), (HtmlParser)null );
318 }
319 catch (java.io.FileNotFoundException e) {
320 System.out.println("File not found : \""+argv[i]+"\"");
321 System.exit(1);
322 }
323 catch (java.io.IOException e) {
324 System.out.println("Error opening file \""+argv[i]+"\"");
325 System.exit(1);
326 }
327 catch (ArrayIndexOutOfBoundsException e) {
328 System.out.println("Usage : java HtmlLexer <inputfile>");
329 System.exit(1);
330 }
331
332 scanner.setDebug(true);
333 try {
334 do {
335 System.out.println(scanner._yylex() + " : " + scanner.yytext() + " lval:" + scanner.lexer_yylval);
336 } while (!scanner.yy_atEOF);
337
338 }
339 catch (java.io.IOException e) {
340 System.out.println("An I/O error occured while scanning :");
341 System.out.println(e);
342 System.exit(1);
343 }
344 catch (Exception e) {
345 e.printStackTrace();
346 System.exit(1);
347 }
348 }
349 }
350
351
352 /***
353 * Creates a new scanner
354 * There is also a java.io.InputStream version of this constructor.
355 *
356 * @param in the java.io.Reader to read input from.
357 */
358 ScriptLexer(java.io.Reader in) {
359 this.yy_reader = in;
360 }
361
362 /***
363 * Creates a new scanner.
364 * There is also java.io.Reader version of this constructor.
365 *
366 * @param in the java.io.Inputstream to read input from.
367 */
368 ScriptLexer(java.io.InputStream in) {
369 this(new java.io.InputStreamReader(in));
370 }
371
372 /***
373 * Unpacks the split, compressed DFA transition table.
374 *
375 * @return the unpacked transition table
376 */
377 private static int [] yy_unpack() {
378 int [] trans = new int[432];
379 int offset = 0;
380 offset = yy_unpack(yy_packed0, offset, trans);
381 return trans;
382 }
383
384 /***
385 * Unpacks the compressed DFA transition table.
386 *
387 * @param packed the packed transition table
388 * @return the index of the last entry
389 */
390 private static int yy_unpack(String packed, int offset, int [] trans) {
391 int i = 0;
392 int j = offset;
393 int l = packed.length();
394 while (i < l) {
395 int count = packed.charAt(i++);
396 int value = packed.charAt(i++);
397 value--;
398 do trans[j++] = value; while (--count > 0);
399 }
400 return j;
401 }
402
403
404 /***
405 * Gets the next input character.
406 *
407 * @return the next character of the input stream, EOF if the
408 * end of the stream is reached.
409 * @exception IOException if any I/O-Error occurs
410 */
411 private int yy_advance() throws java.io.IOException {
412
413
414 if (yy_currentPos < yy_endRead) return yy_buffer[yy_currentPos++];
415
416
417 if (yy_atEOF) return YYEOF;
418
419
420
421
422 if (yy_startRead > 0) {
423 System.arraycopy(yy_buffer, yy_startRead,
424 yy_buffer, 0,
425 yy_endRead-yy_startRead);
426
427
428 yy_endRead-= yy_startRead;
429 yy_currentPos-= yy_startRead;
430 yy_markedPos-= yy_startRead;
431 yy_pushbackPos-= yy_startRead;
432 yy_startRead = 0;
433 }
434
435
436 if (yy_currentPos >= yy_buffer.length) {
437
438 char newBuffer[] = new char[yy_currentPos*2];
439 System.arraycopy(yy_buffer, 0, newBuffer, 0, yy_buffer.length);
440 yy_buffer = newBuffer;
441 }
442
443
444 int numRead = yy_reader.read(yy_buffer, yy_endRead,
445 yy_buffer.length-yy_endRead);
446
447 if ( numRead == -1 ) return YYEOF;
448
449 yy_endRead+= numRead;
450
451 return yy_buffer[yy_currentPos++];
452 }
453
454
455 /***
456 * Closes the input stream.
457 */
458 final public void yyclose() throws java.io.IOException {
459 yy_atEOF = true;
460 yy_endRead = yy_startRead;
461
462 if (yy_reader != null)
463 yy_reader.close();
464 }
465
466
467 /***
468 * Closes the current stream, and resets the
469 * scanner to read from a new input stream.
470 *
471 * All internal variables are reset, the old input stream
472 * <b>cannot</b> be reused (internal buffer is discarded and lost).
473 * Lexical state is set to <tt>YY_INITIAL</tt>.
474 *
475 * @param reader the new input stream
476 */
477 final public void yyreset(java.io.Reader reader) throws java.io.IOException {
478 yyclose();
479 yy_reader = reader;
480 yy_atBOL = true;
481 yy_atEOF = false;
482 yy_endRead = yy_startRead = 0;
483 yy_currentPos = yy_markedPos = yy_pushbackPos = 0;
484 yyline = yychar = yycolumn = 0;
485 yy_lexical_state = YYINITIAL;
486 }
487
488
489 /***
490 * Returns the current lexical state.
491 */
492 final public int yystate() {
493 return yy_lexical_state;
494 }
495
496
497 /***
498 * Enters a new lexical state
499 *
500 * @param newState the new lexical state
501 */
502 final public void yybegin(int newState) {
503 yy_lexical_state = newState;
504 }
505
506
507 /***
508 * Returns the text matched by the current regular expression.
509 */
510 final public String yytext() {
511 return new String( yy_buffer, yy_startRead, yy_markedPos-yy_startRead );
512 }
513
514
515 /***
516 * Returns the character at position <tt>pos</tt> from the
517 * matched text.
518 *
519 * It is equivalent to yytext().charAt(pos), but faster
520 *
521 * @param pos the position of the character to fetch.
522 * A value from 0 to yylength()-1.
523 *
524 * @return the character at position pos
525 */
526 final public char yycharat(int pos) {
527 return yy_buffer[yy_startRead+pos];
528 }
529
530
531 /***
532 * Returns the length of the matched text region.
533 */
534 final public int yylength() {
535 return yy_markedPos-yy_startRead;
536 }
537
538
539 /***
540 * Reports an error that occured while scanning.
541 *
542 * In a wellformed scanner (no or only correct usage of
543 * yypushback(int) and a match-all fallback rule) this method
544 * will only be called with things that "Can't Possibly Happen".
545 * If this method is called, something is seriously wrong
546 * (e.g. a JFlex bug producing a faulty scanner etc.).
547 *
548 * Usual syntax/scanner level error handling should be done
549 * in error fallback rules.
550 *
551 * @param errorCode the code of the errormessage to display
552 */
553 private void yy_ScanError(int errorCode) {
554 String message;
555 try {
556 message = YY_ERROR_MSG[errorCode];
557 }
558 catch (ArrayIndexOutOfBoundsException e) {
559 message = YY_ERROR_MSG[YY_UNKNOWN_ERROR];
560 }
561
562 throw new Error(message);
563 }
564
565
566 /***
567 * Pushes the specified amount of characters back into the input stream.
568 *
569 * They will be read again by then next call of the scanning method
570 *
571 * @param number the number of characters to be read again.
572 * This number must not be greater than yylength()!
573 */
574 private void yypushback(int number) {
575 if ( number > yylength() )
576 yy_ScanError(YY_PUSHBACK_2BIG);
577
578 yy_markedPos -= number;
579 }
580
581
582 /***
583 * Contains user EOF-code, which will be executed exactly once,
584 * when the end of file is reached
585 */
586 private void yy_do_eof() throws java.io.IOException {
587 if (!yy_eof_done) {
588 yy_eof_done = true;
589 yyclose();
590 }
591 }
592
593
594 /***
595 * Resumes scanning until the next regular expression is matched,
596 * the end of input is encountered or an I/O-Error occurs.
597 *
598 * @return the next token
599 * @exception IOException if any I/O-Error occurs
600 */
601 public int yylex() throws java.io.IOException {
602 int yy_input;
603 int yy_action;
604
605
606 while (true) {
607
608 yy_action = -1;
609
610 yy_currentPos = yy_startRead = yy_markedPos;
611
612 yy_state = yy_lexical_state;
613
614
615 yy_forAction: {
616 while (true) {
617
618 yy_input = yy_advance();
619
620 if ( yy_input == YYEOF ) break yy_forAction;
621
622 int yy_next = yytrans[ yy_rowMap[yy_state] + yycmap[yy_input] ];
623 if (yy_next == -1) break yy_forAction;
624 yy_state = yy_next;
625
626 int yy_attributes = YY_ATTRIBUTE[yy_state];
627 if ( (yy_attributes & 1) > 0 ) {
628 yy_action = yy_state;
629 yy_markedPos = yy_currentPos;
630 if ( (yy_attributes & 8) > 0 ) break yy_forAction;
631 }
632
633 }
634 }
635
636
637 switch (yy_action) {
638
639 case 19:
640 {
641 yybegin(YYINITIAL);
642 tokenQueue.add(new Lval(cdata.toString()));
643 tokenQueue.add(new Integer(HtmlParser.TEXT));
644 tokenQueue.add(new Integer(HtmlParser.CDATA_END));
645 return -2;
646 }
647 case 33: break;
648 case 1:
649 case 7:
650 case 8:
651 {
652 cdata.append(yytext());
653 }
654 case 34: break;
655 case 4:
656 case 6:
657 {
658 text.append(yytext());
659 }
660 case 35: break;
661 case 0:
662 case 3:
663 {
664 text.append(yytext());
665 tokenQueue.add(new Lval(new String(text.toString())));
666 tokenQueue.add(new Integer(HtmlParser.TEXT));
667 text.setLength(0);
668 }
669 case 36: break;
670 case 31:
671 {
672 tokenQueue.add(new Integer(HtmlParser.ANGLE_END_OPEN));
673 String name = yytext();
674 name = name.substring(2, 8);
675 tokenQueue.add(new Lval(name));
676 tokenQueue.add(new Integer(HtmlParser.NAME));
677 tokenQueue.add(new Integer(HtmlParser.ANGLE_CLOSE));
678 return -2;
679 }
680 case 37: break;
681 case 30:
682 {
683 yybegin(IGNORE_CDATA);
684 cdata.setLength(0);
685 return HtmlParser.CDATA_START;
686 }
687 case 38: break;
688 case 10:
689 {
690 yybegin(YYINITIAL);
691 if (getDebug()) System.err.println("ERROR_RECOVER: recovered");
692 return HtmlParser.ANGLE_CLOSE;
693 }
694 case 39: break;
695 case 5:
696 {
697 text.append(yytext());
698 }
699 case 40: break;
700 case 2:
701 case 9:
702 {
703
704 }
705 case 41: break;
706 case 18:
707 { }
708 case 42: break;
709 default:
710 if (yy_input == YYEOF && yy_startRead == yy_currentPos) {
711 yy_atEOF = true;
712 yy_do_eof();
713 { return 0; }
714 }
715 else {
716 yy_ScanError(YY_NO_MATCH);
717 }
718 }
719 }
720 }
721
722
723 }