1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package jTrolog.parser;
23
24 import jTrolog.errors.InvalidTermException;
25 import jTrolog.parser.Parser;
26 import jTrolog.parser.Token;
27
28 import java.io.*;
29 import java.util.LinkedList;
30 import java.util.Arrays;
31
32
33
34
35
36
37
38
39
40
41
42 @SuppressWarnings({ "rawtypes", "unchecked","serial" })
43 class Tokenizer extends StreamTokenizer implements Serializable {
44
45 static final char[] GRAPHIC_CHARS = { '\\', '$', '&', '?', '^', '@', '#', '.', ',', ':', ';', '=', '<', '>', '+', '-', '*', '/', '~' };
46 static {
47 Arrays.sort(Tokenizer.GRAPHIC_CHARS);
48
49
50 }
51
52
53
54 private LinkedList tokenList = new LinkedList();
55
56
57
58 private PushBack pushBack2 = null;
59
60 public Tokenizer(String text) {
61 this(new StringReader(text));
62 }
63
64
65
66
67 public Tokenizer(Reader text) {
68 super(text);
69
70
71 resetSyntax();
72
73
74 wordChars('a', 'z');
75 wordChars('A', 'Z');
76 wordChars('_', '_');
77 wordChars('0', '9');
78
79 ordinaryChar('!');
80
81
82 ordinaryChar('\\');
83 ordinaryChar('$');
84 ordinaryChar('&');
85 ordinaryChar('^');
86 ordinaryChar('@');
87 ordinaryChar('#');
88 ordinaryChar(',');
89 ordinaryChar('.');
90 ordinaryChar(':');
91 ordinaryChar(';');
92 ordinaryChar('=');
93 ordinaryChar('<');
94 ordinaryChar('>');
95 ordinaryChar('+');
96 ordinaryChar('-');
97 ordinaryChar('*');
98 ordinaryChar('/');
99 ordinaryChar('~');
100
101
102 ordinaryChar('\'');
103
104 ordinaryChar('\"');
105
106
107 ordinaryChar('%');
108
109
110
111
112 }
113
114
115
116
117 Token readToken() throws InvalidTermException, IOException {
118 return !tokenList.isEmpty() ? (Token) tokenList.removeFirst() : readNextToken();
119 }
120
121
122
123
124 void unreadToken(Token token) {
125 tokenList.addFirst(token);
126 }
127
128 Token readNextToken() throws IOException, InvalidTermException {
129 int typea;
130 String svala;
131 if (pushBack2 != null) {
132 typea = pushBack2.typea;
133 svala = pushBack2.svala;
134 pushBack2 = null;
135 } else {
136 typea = super.nextToken();
137 svala = sval;
138 }
139
140
141
142
143
144 while (Tokenizer.isWhite(typea)) {
145 typea = super.nextToken();
146 svala = sval;
147 }
148
149
150
151 if (typea == '%') {
152 do {
153 typea = super.nextToken();
154 } while (typea != '\r' && typea != '\n' && typea != TT_EOF);
155 pushBack();
156
157
158 return readNextToken();
159 }
160
161
162 if (typea == '/') {
163 int typeb = super.nextToken();
164 if (typeb == '*') {
165 do {
166 typea = typeb;
167 typeb = super.nextToken();
168 } while (typea != '*' || typeb != '/');
169 return readNextToken();
170 } else {
171 pushBack();
172 }
173 }
174
175
176 if (typea == TT_EOF)
177 return new Token("", Token.EOF);
178 if (typea == '(')
179 return new Token("(", '(');
180 if (typea == ')')
181 return new Token(")", ')');
182 if (typea == '{')
183 return new Token("{", '{');
184 if (typea == '}')
185 return new Token("}", '}');
186 if (typea == '[')
187 return new Token("[", '[');
188 if (typea == ']')
189 return new Token("]", ']');
190 if (typea == '|')
191 return new Token("|", '|');
192
193 if (typea == '!')
194 return new Token("!", Token.ATOM);
195 if (typea == ',')
196 return new Token(",", Token.OPERATOR);
197
198
199
200 if (typea == '.') {
201 int typeb = super.nextToken();
202 pushBack();
203 if (Tokenizer.isWhite(typeb) || typeb == '%' || typeb == StreamTokenizer.TT_EOF)
204 return new Token(".", '.');
205 }
206
207 boolean isNumber = false;
208
209
210 if (typea == TT_WORD) {
211 char firstChar = svala.charAt(0);
212
213 if (Character.isUpperCase(firstChar) || '_' == firstChar)
214 return new Token(svala, Token.VARIABLE);
215
216 else if (firstChar >= '0' && firstChar <= '9')
217
218
219 isNumber = true;
220
221 else {
222 int typeb = super.nextToken();
223
224 pushBack();
225
226 if (typeb == '(')
227 return new Token(svala, Token.ATOM_FUNCTOR);
228 if (Tokenizer.isWhite(typeb))
229 return new Token(svala, Token.ATOM_OPERATOR);
230 return new Token(svala, Token.ATOM);
231 }
232 }
233
234
235 if (typea == '\'' || typea == '\"' || typea == '`') {
236 int qType = typea;
237 StringBuffer quote = new StringBuffer();
238 while (true) {
239
240 typea = super.nextToken();
241 svala = sval;
242
243 if (typea == '\\') {
244 int typeb = super.nextToken();
245 if (typeb == '\\') {
246 quote.append((char) typeb);
247 continue;
248 }
249 if (typeb == '\n')
250
251 continue;
252 if (typeb == '\r') {
253 int typec = super.nextToken();
254 if (typec == '\n')
255 continue;
256
257 pushBack();
258 continue;
259 }
260 pushBack();
261 }
262
263 if (typea == qType) {
264 int typeb = super.nextToken();
265 if (typeb == qType) {
266 quote.append((char) qType);
267 continue;
268 } else {
269 pushBack();
270 break;
271 }
272 }
273 if (typea == '\n' || typea == '\r')
274 throw new InvalidTermException("line break in quote not allowed (unless they are escaped \\ first)");
275
276 if (svala != null)
277 quote.append(svala);
278 else
279 quote.append((char) typea);
280 }
281
282 String quoteBody = quote.toString();
283
284 qType = qType == '\'' ? Token.SQ_SEQUENCE : qType == '\"' ? Token.DQ_SEQUENCE : Token.SQ_SEQUENCE;
285 if (qType == Token.SQ_SEQUENCE) {
286 if (Parser.isAtom(quoteBody))
287 qType = Token.ATOM;
288 int typeb = super.nextToken();
289
290 pushBack();
291
292 if (typeb == '(')
293 return new Token(quoteBody, Token.SQ_FUNCTOR);
294 }
295 return new Token(quoteBody, qType);
296 }
297
298
299 if (Arrays.binarySearch(Tokenizer.GRAPHIC_CHARS, (char) typea) >= 0) {
300
301
302
303 StringBuffer symbols = new StringBuffer();
304 int typeb = typea;
305
306 while (Arrays.binarySearch(Tokenizer.GRAPHIC_CHARS, (char) typeb) >= 0) {
307 symbols.append((char) typeb);
308 typeb = super.nextToken();
309
310 }
311 pushBack();
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330 if (typeb == '(')
331 return new Token(symbols.toString(), Token.OPERATOR_FUNCTOR);
332 return new Token(symbols.toString(), Token.OPERATOR);
333 }
334
335
336 if (isNumber) {
337 try {
338
339
340
341 if (svala.startsWith("0")) {
342 if (svala.indexOf('b') == 1)
343 return new Token("" + java.lang.Long.parseLong(svala.substring(2), 2), Token.INTEGER);
344
345 if (svala.indexOf('o') == 1)
346 return new Token("" + java.lang.Long.parseLong(svala.substring(2), 8), Token.INTEGER);
347
348 if (svala.indexOf('x') == 1)
349 return new Token("" + java.lang.Long.parseLong(svala.substring(2), 16), Token.INTEGER);
350
351 }
352
353
354 int typeb = super.nextToken();
355 String svalb = sval;
356
357
358 if (typeb != '.' && typeb != '\'') {
359
360 pushBack();
361 return new Token("" + java.lang.Long.parseLong(svala), Token.INTEGER);
362 }
363
364
365 if (typeb == '\'' && "0".equals(svala)) {
366 int typec = super.nextToken();
367 String svalc = sval;
368 int intVal;
369 if ((intVal = isCharacterCodeConstantToken(typec, svalc)) != -1)
370 return new Token("" + intVal, Token.INTEGER);
371
372
373 throw new InvalidTermException("Character code constant starting with 0'<X> at line: " + super.lineno() + " cannot be recognized.");
374 }
375
376
377
378 java.lang.Long.parseLong(svala);
379
380
381 if (typeb != '.')
382 throw new InvalidTermException("A number starting with 0-9 cannot be rcognized as an int and does not have a fraction '.' at line: " + super.lineno());
383
384
385 int typec = super.nextToken();
386 String svalc = sval;
387
388
389
390 if (typec != TT_WORD) {
391
392 pushBack();
393 pushBack2 = new PushBack(typeb, svalb);
394
395 return new Token(svala, Token.INTEGER);
396
397 }
398
399
400 int exponent = svalc.indexOf("E");
401 if (exponent == -1)
402 exponent = svalc.indexOf("e");
403
404 if (exponent >= 1) {
405 if (exponent == svalc.length() - 1) {
406
407
408 int typeb2 = super.nextToken();
409 if (typeb2 == '+' || typeb2 == '-') {
410 int typec2 = super.nextToken();
411 String svalc2 = sval;
412 if (typec2 == TT_WORD) {
413
414
415 java.lang.Long.parseLong(svalc.substring(0, exponent));
416 java.lang.Integer.parseInt(svalc2);
417 return new Token(svala + "." + svalc + (char) typeb2 + svalc2, Token.FLOAT);
418 }
419 }
420 }
421 }
422
423
424 java.lang.Double.parseDouble(svala + "." + svalc);
425 return new Token(svala + "." + svalc, Token.FLOAT);
426
427 } catch (NumberFormatException e) {
428
429 throw new InvalidTermException("A term starting with 0-9 cannot be parsed as a number at line: " + lineno());
430 }
431 }
432 throw new InvalidTermException("Unknown Unicode character: " + typea + " (" + svala + ")");
433 }
434
435
436
437
438
439
440
441
442
443 private static int isCharacterCodeConstantToken(int typec, String svalc) {
444 if (svalc != null) {
445 if (svalc.length() == 1)
446 return (int) svalc.charAt(0);
447 if (svalc.length() > 1) {
448
449
450
451
452
453 return -1;
454 }
455 }
456 if (typec == ' ' ||
457 typec == '(' || typec == ')' || typec == '{' || typec == '}' || typec == '[' || typec == ']' || Arrays.binarySearch(GRAPHIC_CHARS, (char) typec) >= 0)
458
459
460
461
462
463 return typec;
464
465 return -1;
466 }
467
468 private static boolean isWhite(int type) {
469 return type == ' ' || type == '\r' || type == '\n' || type == '\t' || type == '\f';
470 }
471
472
473
474
475
476 private static class PushBack {
477 int typea;
478 String svala;
479
480 public PushBack(int i, String s) {
481 typea = i;
482 svala = s;
483 }
484 }
485 }