1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package jTrolog.parser;
23
24 import jTrolog.engine.Prolog;
25 import jTrolog.errors.InvalidTermException;
26 import jTrolog.terms.Double;
27 import jTrolog.terms.Int;
28 import jTrolog.terms.Number;
29 import jTrolog.terms.Struct;
30 import jTrolog.terms.StructAtom;
31 import jTrolog.terms.Term;
32 import jTrolog.terms.Var;
33
34 import java.io.BufferedReader;
35 import java.io.IOException;
36 import java.io.InputStream;
37 import java.io.InputStreamReader;
38 import java.io.Serializable;
39 import java.util.ArrayList;
40 import java.util.Iterator;
41 import java.util.LinkedList;
42 import java.util.List;
43 import java.util.regex.Pattern;
44
45
46
47
48
49
50
51
52
53
54
55
56 @SuppressWarnings({ "rawtypes", "unchecked","serial" })
57 public class Parser implements Serializable {
58 int dqFlag = 0;
59 private static final int DQ_ATOMS = 0;
60 private static final int DQ_CHARS = 1;
61 private static final int DQ_CODES = 2;
62
63 public static final String floatSignature = "float/1".intern();
64 public static final String listSignature = "'.'/2".intern();
65 public static final String commaSignature = "','/2".intern();
66 public static final String cutSignature = "!/0".intern();
67 public static final String singleClauseSignature = "':-'/1".intern();
68 public static final String doubleClauseSignature = "':-'/2".intern();
69 public static final String semiColonSignature = "';'/2".intern();
70 public static final String ifSignature = "'->'/2".intern();
71 public static final String callSignature = "call/1".intern();
72 public static final String throwSignature = "throw/1".intern();
73 public static final String catchSignature = "catch/3".intern();
74 public static final String trueSignature = "true/0".intern();
75 public static final String failSignature = "fail/0".intern();
76
77 private Tokenizer tokenizer;
78 private Prolog engine;
79 private List variableList;
80
81
82
83
84
85 public Parser(InputStream theoryText, Prolog p) {
86 this(p, new Tokenizer(new BufferedReader(new InputStreamReader(theoryText))));
87 }
88
89
90
91
92
93 public Parser(String theoryText, Prolog engine) {
94 this(engine, new Tokenizer(theoryText));
95 }
96
97
98
99
100 public Parser(String theoryText) {
101 this(null, new Tokenizer(theoryText));
102 }
103
104
105
106
107 public Parser(InputStream theoryText) {
108 this(null, new Tokenizer(new BufferedReader(new InputStreamReader(theoryText))));
109 }
110
111 private Parser(Prolog p, Tokenizer lexer) {
112 tokenizer = lexer;
113 variableList = new ArrayList();
114 if (p == null) {
115 engine = Prolog.defaultMachine;
116 } else {
117 engine = p;
118 Term dqFlag = p.getFlagValue("double_quotes");
119 if (dqFlag != null) {
120 if ("chars".equals(dqFlag.toString()))
121 this.dqFlag = DQ_CHARS;
122 else if ("codes".equals(dqFlag.toString()))
123 this.dqFlag = DQ_CODES;
124 else
125 this.dqFlag = DQ_ATOMS;
126 }
127 }
128 }
129
130
131
132 public Iterator iterator() throws InvalidTermException {
133 return new TermIterator(this);
134 }
135
136
137
138
139
140
141
142
143
144
145 public Term nextTerm(boolean endNeeded) throws InvalidTermException {
146 try {
147 variableList.clear();
148 Token t = tokenizer.readToken();
149 if (t.isEOF())
150 return null;
151
152 tokenizer.unreadToken(t);
153 Term term = expr(Prolog.OP_HIGH, false);
154 if (term == null)
155 throw new InvalidTermException("The parser is unable to finish.");
156
157 if (endNeeded && !tokenizer.readToken().isType('.'))
158 throw new InvalidTermException("The term " + term + " is not ended with a period.");
159 return term;
160 } catch (IOException ex) {
161 throw new InvalidTermException("An I/O error occured.");
162 }
163 }
164
165
166
167 private Term expr(int maxPriority, boolean commaIsEndMarker) throws InvalidTermException, IOException {
168
169
170 Term leftRes = parseLeftSide(commaIsEndMarker, maxPriority);
171
172 int minPriority = 0;
173
174
175 Token operator = tokenizer.readToken();
176 for (; operator.isOperator(commaIsEndMarker); operator = tokenizer.readToken()) {
177 int XFX = engine.getOperatorPriority(operator.seq, Prolog.XFX);
178 int XFY = engine.getOperatorPriority(operator.seq, Prolog.XFY);
179 int XF = engine.getOperatorPriority(operator.seq, Prolog.XF);
180 int YFX = engine.getOperatorPriority(operator.seq, Prolog.YFX);
181 int YF = engine.getOperatorPriority(operator.seq, Prolog.YF);
182
183
184
185 if (XFX > maxPriority || XFX < Prolog.OP_LOW)
186 XFX = -1;
187 if (XFY > maxPriority || XFY < Prolog.OP_LOW)
188 XFY = -1;
189 if (XF > maxPriority || XF < Prolog.OP_LOW)
190 XF = -1;
191 if (YF < minPriority || YF > maxPriority)
192 YF = -1;
193 if (YFX < minPriority || YFX > maxPriority)
194 YFX = -1;
195
196
197 if (XFX >= XFY && XFX >= XF && XFX >= minPriority) {
198
199 Term found = expr(XFX - 1, commaIsEndMarker);
200 if (found != null) {
201 minPriority = XFX;
202 leftRes = new Struct(operator.seq, new Term[] { leftRes, found }, Prolog.XFX);
203 continue;
204 }
205 }
206
207 else if (XFY >= XF && XFY >= minPriority) {
208
209 Term found = expr(XFY, commaIsEndMarker);
210 if (found != null) {
211 minPriority = XFY;
212 leftRes = new Struct(operator.seq, new Term[] { leftRes, found }, Prolog.XFY);
213 continue;
214 }
215 }
216
217 else if (XF >= minPriority)
218
219 return new Struct(operator.seq, new Term[] { leftRes }, Prolog.XF);
220
221
222 else if (XFX >= minPriority) {
223 Term found = expr(XFX - 1, commaIsEndMarker);
224 if (found != null) {
225 minPriority = XFX;
226 leftRes = new Struct(operator.seq, new Term[] { leftRes, found }, Prolog.XFX);
227 continue;
228 }
229 }
230
231 else if (YFX >= YF && YFX >= Prolog.OP_LOW) {
232 Term found = expr(YFX - 1, commaIsEndMarker);
233 if (found != null) {
234 minPriority = YFX;
235 leftRes = new Struct(operator.seq, new Term[] { leftRes, found }, Prolog.YFX);
236 continue;
237 }
238 }
239
240 else if (YF >= Prolog.OP_LOW) {
241 minPriority = YF;
242 leftRes = new Struct(operator.seq, new Term[] { leftRes }, Prolog.YF);
243 continue;
244 }
245 break;
246 }
247 tokenizer.unreadToken(operator);
248 return leftRes;
249 }
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266 private Term parseLeftSide(boolean commaIsEndMarker, int maxPriority) throws InvalidTermException, IOException {
267
268 Token f = tokenizer.readToken();
269 if (f.isOperator(commaIsEndMarker) && !f.isFunctor()) {
270 int FX = engine.getOperatorPriority(f.seq, Prolog.FX);
271 int FY = engine.getOperatorPriority(f.seq, Prolog.FY);
272
273 if (f.seq.equals("-")) {
274 Token t = tokenizer.readToken();
275 if (t.isNumber())
276 return jTrolog.terms.Number.create("-" + t.seq);
277 else
278 tokenizer.unreadToken(t);
279 }
280
281
282 if (FY > maxPriority)
283 FY = -1;
284 if (FX > maxPriority)
285 FX = -1;
286
287
288 if (FX >= FY && FX >= Prolog.OP_LOW) {
289 Term found = expr(FX - 1, commaIsEndMarker);
290
291 if (found != null)
292 return new Struct(f.seq, new Term[] { found }, Prolog.FX);
293 }
294
295 else if (FY >= Prolog.OP_LOW) {
296 Term found = expr(FY, commaIsEndMarker);
297
298
299 if (found != null)
300 return new Struct(f.seq, new Term[] { found }, Prolog.FY);
301 }
302
303 else if (FX >= Prolog.OP_LOW) {
304 Term found = expr(FX - 1, commaIsEndMarker);
305
306 if (found != null)
307 return new Struct(f.seq, new Term[] { found }, Prolog.FX);
308 }
309 }
310 tokenizer.unreadToken(f);
311
312 return expr0();
313 }
314
315
316
317
318
319
320 private Term expr0() throws InvalidTermException, IOException {
321 Token t1 = tokenizer.readToken();
322
323 if (t1.isType(Token.INTEGER))
324 return Int.create(t1.seq);
325
326 if (t1.isType(Token.FLOAT))
327 return new Double(t1.seq);
328
329 if (t1.isType(Token.VARIABLE)) {
330 int pos = variableList.indexOf(t1.seq);
331 if (pos != -1 && t1.seq != Var.ANY)
332 return new Var(t1.seq, pos + 1);
333 variableList.add(t1.seq);
334 return new Var(t1.seq, variableList.size());
335 }
336
337 if (t1.isFunctor()) {
338 String functor = t1.seq;
339 Token t = tokenizer.readToken();
340 LinkedList l = new LinkedList();
341 do {
342 l.add(expr(Prolog.OP_HIGH, true));
343 t = tokenizer.readToken();
344 if (")".equals(t.seq))
345 return new Struct(functor, (Term[]) l.toArray(new Term[0]));
346 } while (",".equals(t.seq));
347 throw new InvalidTermException("Error in argument list syntax.\n" + "Token: " + t + " not expected at line " + tokenizer.lineno() + ".");
348 }
349
350 if (t1.isType(Token.DQ_SEQUENCE)) {
351 if (dqFlag == Parser.DQ_ATOMS)
352 return new StructAtom(t1.seq);
353 if (dqFlag == Parser.DQ_CHARS)
354 return stringToStructList(t1.seq);
355 if (dqFlag == Parser.DQ_CODES) {
356 char[] chars = t1.seq.toCharArray();
357 int[] codes = new int[chars.length];
358 for (int i = 0; i < chars.length; i++)
359 codes[i] = chars[i];
360 return intsToStructList(codes);
361 }
362 }
363
364 if (t1.isAtom())
365 return new StructAtom(t1.seq);
366
367
368
369
370 if (t1.isType('(')) {
371 Term term = expr(Prolog.OP_HIGH, false);
372 if (tokenizer.readToken().isType(')'))
373 return term;
374 throw new InvalidTermException("Missing right parenthesis: (" + term + " -> here <-");
375 }
376
377 if (t1.isType('[')) {
378 Token t2 = tokenizer.readToken();
379 if (t2.isType(']'))
380 return Term.emptyList;
381 tokenizer.unreadToken(t2);
382
383 LinkedList elems = new LinkedList();
384 do {
385 elems.add(expr(Prolog.OP_HIGH, true));
386 t2 = tokenizer.readToken();
387 if ("|".equals(t2.seq)) {
388 elems.add(expr(Prolog.OP_HIGH, true));
389 t2 = tokenizer.readToken();
390 if ("]".equals(t2.seq))
391 return createStructList(elems);
392 throw new InvalidTermException("Missing ']' after: " + elems.getLast());
393 }
394 if ("]".equals(t2.seq)) {
395 elems.add(Term.emptyList);
396 return createStructList(elems);
397 }
398 } while (",".equals(t2.seq));
399 throw new InvalidTermException("Error in list syntax after: " + elems.getLast());
400 }
401
402 if (t1.isType('{')) {
403 Token t2 = tokenizer.readToken();
404 if (t2.isType('}'))
405 return new StructAtom("{}");
406
407 tokenizer.unreadToken(t2);
408 Term arg = expr(Prolog.OP_HIGH, false);
409 t2 = tokenizer.readToken();
410 if (t2.isType('}'))
411 return new Struct("{}", new Term[] { arg });
412 throw new InvalidTermException("Missing right braces: {" + arg + " -> here <-");
413 }
414
415 throw new InvalidTermException("The following token could not be identified: " + t1.seq);
416 }
417
418 public int getCurrentLine() {
419 return tokenizer.lineno();
420 }
421
422 public static Struct createListContainingAnyVars(int lengthInt) {
423 LinkedList vars = new LinkedList();
424 for (int i = 0; i < lengthInt; i++)
425 vars.add(new Var("_", i + 1));
426 vars.add(Term.emptyList);
427 return createStructList(vars);
428 }
429
430 public static Struct createStructList(LinkedList complete) {
431 if (complete.isEmpty())
432 return Term.emptyList;
433 if (complete.size() == 2)
434 return new Struct(".", new Term[] { (Term) complete.getFirst(), (Term) complete.getLast() });
435 if (complete.size() > 2) {
436 Term head = (Term) complete.removeFirst();
437 return new Struct(".", new Term[] { head, createStructList(complete) });
438 }
439 throw new RuntimeException("omg-..");
440 }
441
442 public static Struct stringToStructList(String charList) {
443 Struct t = StructAtom.emptyList;
444 for (int i = charList.length() - 1; i >= 0; i--)
445 t = new Struct(".", new Term[] { new StructAtom(Character.toString(charList.charAt(i))), t });
446 return t;
447 }
448
449 public static Struct intsToStructList(int[] numbers) {
450 Struct t = StructAtom.emptyList;
451 for (int i = numbers.length - 1; i >= 0; i--)
452 t = new Struct(".", new Term[] { new Int(numbers[i]), t });
453 return t;
454 }
455
456 public static boolean isSemiAndNotIf(Struct struct) {
457 if (struct == null || struct.predicateIndicator != semiColonSignature)
458 return false;
459 final Term left = struct.getArg(0);
460 return !(left instanceof Struct) || ((Struct) left).predicateIndicator != ifSignature;
461 }
462
463
464
465
466
467 public static String wrapAtom(final String atom) {
468 return isAtom(atom) || (atom.startsWith("'") && atom.endsWith("'")) ? atom : "'" + atom + "'";
469 }
470
471
472
473
474 public static boolean isAtom(String s) {
475 return atom.matcher(s).matches();
476 }
477
478 static private Pattern atom = Pattern.compile("(!|[a-z][a-zA-Z_0-9]*)");
479
480 public static Number parseNumber(String s) throws InvalidTermException {
481 Term t = new Parser(s).nextTerm(false);
482 if (t instanceof Number)
483 return (Number) t;
484 throw new InvalidTermException("Term " + t + " is not a number.");
485 }
486
487 public static String removeApices(String st) {
488 if (st.startsWith("'") && st.endsWith("'") && st.length() > 2)
489 return st.substring(1, st.length() - 1);
490 return st;
491 }
492 }