1 package com.instantbank.collections.util;
2
3
4
10 public class LexicalAnalyzer extends Object {
11 public final int TINVALID = -1;
12 public final int TEND = 0;
13 public final int TLESSTHAN = 1;
14 public final int TGREATERTHAN = 2;
15 public final int TQUESTION = 3;
16 public final int TSLASH = 4;
17 public final int TEQUAL = 5;
18 public final int TENDTAG = 6;
19 public final int TSTARTHEADER = 7;
20 public final int TNAME = 8;
21 public final int TTEXT = 9;
22 public final int TVALUE = 10;
23
24 private final String tokens[] = {"<", ">", "?", "/", "=", "</", "<?", "Name", "Text", "Value"};
25
26 private final String letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
27 private final String spaces = "\n \t\r";
28 private final String separators = spaces + "=/<>\"'?";
29 private final String numbers = "0123456789";
30
31 private int currentPosition;
32 private String document;
33 private int length = 0;
34 private String value;
35
36
37
42 public LexicalAnalyzer(String document) {
43 this.document = document;
44 length = document.length();
45 }
46
47
48 private char getChar() {
49 if(currentPosition >= length) {
50 return 0;
51 }
52 return document.charAt(currentPosition);
53 }
54
55
56 public int getToken() {
57 char c;
58 char start;
59
60 ignoreSpaces();
61 c = getChar();
62 if(c == 0) {
63 return TEND;
64 }
65 if(c == '<') {
66 if(document.charAt(currentPosition + 1) == '/') {
67 currentPosition += 2;
68 return TENDTAG;
69 }
70 if(document.charAt(currentPosition + 1) == '?') {
71 currentPosition += 2;
72 return TSTARTHEADER;
73 }
74 currentPosition++;
75 return TLESSTHAN;
76 }
77 if(c == '>') {
78 currentPosition++;
79 return TGREATERTHAN;
80 }
81 if(c == '?') {
82 currentPosition++;
83 return TQUESTION;
84 }
85 if(c == '/') {
86 currentPosition++;
87 return TSLASH;
88 }
89 if(c == '=') {
90 currentPosition++;
91 return TEQUAL;
92 }
93 if(c == '\'' || c == '\"') {
94 value = "";
95 start = c;
96 while(true) {
97 currentPosition++;
98 c = getChar();
99 if(c == 0) {
100 return TEND;
101 }
102 if(c == start) {
103 currentPosition++;
104 break;
105 }
106 value += c;
107 }
108 return TVALUE;
109 }
110 if(getTokenName(c)) {
111 return TNAME;
112 }
113 return TINVALID;
114 }
115
116
117 public int getTokenElement() {
118 char c;
119 char start;
120
121 ignoreSpaces();
122 c = getChar();
123 if(c == 0) {
124 return TEND;
125 }
126 if(c == '<') {
127 if(document.charAt(currentPosition + 1) == '/') {
128 currentPosition += 2;
129 return TENDTAG;
130 }
131 currentPosition++;
132 return TLESSTHAN;
133 }
134 if(getTokenText(c)) {
135 return TTEXT;
136 }
137 return TINVALID;
138 }
139
140
141 private boolean getTokenName(char par) {
142 char c;
143
144 c = par;
145 if(letters.indexOf(c) >= 0) {
146 value = "";
147 while(true) {
148 value += c;
149 currentPosition++;
150 c = getChar();
151 if(separators.indexOf(c) >= 0) {
152 break;
153 }
154 }
155 return true;
156 }
157 else {
158 return false;
159 }
160 }
161
162
163 private boolean getTokenText(char par) {
164 char c;
165
166 c = par;
167 value = "";
168 while(true) {
169 if(c == '<') {
170 break;
171 }
172 if(c == 0) {
173 break;
174 }
175 value += c;
176 currentPosition++;
177 c = getChar();
178 }
179 return true;
180 }
181
182
183 public String getTokenString(int t) {
184 return tokens[t - 1];
185 }
186
187
188 public String getValue() {
189 return value;
190 }
191
192
193 private void ignoreSpaces() {
194 char c;
195
196 while(true) {
197 c = getChar();
198 if(c == 0) {
199 break;
200 }
201 if(spaces.indexOf(c) == -1) {
202 break;
203 }
204 currentPosition++;
205 }
206 }
207 }
208
209