View Javadoc

1   /*
2    * #%L
3    * prolobjectlink-jpi-jtrolog
4    * %%
5    * Copyright (C) 2012 - 2018 WorkLogic Project
6    * %%
7    * This program is free software: you can redistribute it and/or modify
8    * it under the terms of the GNU Lesser General Public License as
9    * published by the Free Software Foundation, either version 2.1 of the
10   * License, or (at your option) any later version.
11   * 
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Lesser Public License for more details.
16   * 
17   * You should have received a copy of the GNU General Lesser Public
18   * License along with this program.  If not, see
19   * <http://www.gnu.org/licenses/lgpl-2.1.html>.
20   * #L%
21   */
22  package jTrolog.parser;
23  
24  import jTrolog.engine.Prolog;
25  import jTrolog.errors.InvalidTermException;
26  import jTrolog.terms.Double;
27  import jTrolog.terms.Int;
28  import jTrolog.terms.Number;
29  import jTrolog.terms.Struct;
30  import jTrolog.terms.StructAtom;
31  import jTrolog.terms.Term;
32  import jTrolog.terms.Var;
33  
34  import java.io.BufferedReader;
35  import java.io.IOException;
36  import java.io.InputStream;
37  import java.io.InputStreamReader;
38  import java.io.Serializable;
39  import java.util.ArrayList;
40  import java.util.Iterator;
41  import java.util.LinkedList;
42  import java.util.List;
43  import java.util.regex.Pattern;
44  
45  /**
46   * This class defines a parser of prolog terms and sentences.
47   * BNF for jTrolog part 2: Parser term ::= expr(1200) expr(n) ::= exprC(n-1) {
48   * op(xfx,n) expr(n-1) | op(xfy,n) expr(n) | op(xf,n) | op(yfx,n) expr(n-1) |
49   * op(yf,n) }* // exprC is called parseLeftSide in the code exprC(n) ::= '-'
50   * integer | '-' float | op( fx,n ) exprA(n-1) | op( fy,n ) exprA(n) | exprA(n)
51   * exprA(0) ::= integer | float | atom | variable | atom'(' expr(1200) { ','
52   * expr(1200) }* ')' | '[' [ expr(1200) { ',' expr(1200) }* [ '|' expr(1200) ] ]
53   * ']' | '(' { expr(1200) }* ')' '{' { expr(1200) }* '}' op(type,n) ::= atom | {
54   * symbol }+
55   */
56  @SuppressWarnings({ "rawtypes", "unchecked","serial" })
57  public class Parser implements Serializable {
58  	int dqFlag = 0;
59  	private static final int DQ_ATOMS = 0;
60  	private static final int DQ_CHARS = 1;
61  	private static final int DQ_CODES = 2;
62  
63  	public static final String floatSignature = "float/1".intern();
64  	public static final String listSignature = "'.'/2".intern();
65  	public static final String commaSignature = "','/2".intern();
66  	public static final String cutSignature = "!/0".intern();
67  	public static final String singleClauseSignature = "':-'/1".intern();
68  	public static final String doubleClauseSignature = "':-'/2".intern();
69  	public static final String semiColonSignature = "';'/2".intern();
70  	public static final String ifSignature = "'->'/2".intern();
71  	public static final String callSignature = "call/1".intern();
72  	public static final String throwSignature = "throw/1".intern();
73  	public static final String catchSignature = "catch/3".intern();
74  	public static final String trueSignature = "true/0".intern();
75  	public static final String failSignature = "fail/0".intern();
76  
77  	private Tokenizer tokenizer;
78  	private Prolog engine;
79  	private List variableList;
80  
81  	/**
82  	 * creating a Parser specifing how to handle operators and what text to
83  	 * parse
84  	 */
85  	public Parser(InputStream theoryText, Prolog p) {
86  		this(p, new Tokenizer(new BufferedReader(new InputStreamReader(theoryText))));
87  	}
88  
89  	/**
90  	 * creating a Parser specifing how to handle operators and what text to
91  	 * parse
92  	 */
93  	public Parser(String theoryText, Prolog engine) {
94  		this(engine, new Tokenizer(theoryText));
95  	}
96  
97  	/**
98  	 * creating a parser with default operator interpretation
99  	 */
100 	public Parser(String theoryText) {
101 		this(null, new Tokenizer(theoryText));
102 	}
103 
104 	/**
105 	 * creating a parser with default operator interpretation
106 	 */
107 	public Parser(InputStream theoryText) {
108 		this(null, new Tokenizer(new BufferedReader(new InputStreamReader(theoryText))));
109 	}
110 
111 	private Parser(Prolog p, Tokenizer lexer) {
112 		tokenizer = lexer;
113 		variableList = new ArrayList();
114 		if (p == null) {
115 			engine = Prolog.defaultMachine;
116 		} else {
117 			engine = p;
118 			Term dqFlag = p.getFlagValue("double_quotes");
119 			if (dqFlag != null) {
120 				if ("chars".equals(dqFlag.toString()))
121 					this.dqFlag = DQ_CHARS;
122 				else if ("codes".equals(dqFlag.toString()))
123 					this.dqFlag = DQ_CODES;
124 				else
125 					this.dqFlag = DQ_ATOMS;
126 			}
127 		}
128 	}
129 
130 	// user interface
131 
132 	public Iterator iterator() throws InvalidTermException {
133 		return new TermIterator(this);
134 	}
135 
136 	/**
137 	 * Parses next term from the stream built on string.
138 	 * 
139 	 * @param endNeeded
140 	 *            <tt>true</tt> if it is required to parse the end token (a
141 	 *            period), <tt>false</tt> otherwise.
142 	 * @throws InvalidTermException
143 	 *             if a syntax error is found.
144 	 */
145 	public Term nextTerm(boolean endNeeded) throws InvalidTermException {
146 		try {
147 			variableList.clear();
148 			Token t = tokenizer.readToken();
149 			if (t.isEOF())
150 				return null;
151 
152 			tokenizer.unreadToken(t);
153 			Term term = expr(Prolog.OP_HIGH, false);
154 			if (term == null)
155 				throw new InvalidTermException("The parser is unable to finish.");
156 
157 			if (endNeeded && !tokenizer.readToken().isType('.'))
158 				throw new InvalidTermException("The term " + term + " is not ended with a period.");
159 			return term;
160 		} catch (IOException ex) {
161 			throw new InvalidTermException("An I/O error occured.");
162 		}
163 	}
164 
165 	// internal parsing procedures
166 
167 	private Term expr(int maxPriority, boolean commaIsEndMarker) throws InvalidTermException, IOException {
168 
169 		// 1. op(fx,n) expr(n-1) | op(fy,n) exprA(n) | expr0
170 		Term leftRes = parseLeftSide(commaIsEndMarker, maxPriority);
171 		// todo should minPriority come from parseLeftSide??
172 		int minPriority = 0;
173 
174 		// 2.left is followed by either xfx, xfy or xf operators, parse these
175 		Token operator = tokenizer.readToken();
176 		for (; operator.isOperator(commaIsEndMarker); operator = tokenizer.readToken()) {
177 			int XFX = engine.getOperatorPriority(operator.seq, Prolog.XFX);
178 			int XFY = engine.getOperatorPriority(operator.seq, Prolog.XFY);
179 			int XF = engine.getOperatorPriority(operator.seq, Prolog.XF);
180 			int YFX = engine.getOperatorPriority(operator.seq, Prolog.YFX);
181 			int YF = engine.getOperatorPriority(operator.seq, Prolog.YF);
182 
183 			// check that no operator has a priority higher than permitted
184 			// or a lower priority than the left side expression
185 			if (XFX > maxPriority || XFX < Prolog.OP_LOW)
186 				XFX = -1;
187 			if (XFY > maxPriority || XFY < Prolog.OP_LOW)
188 				XFY = -1;
189 			if (XF > maxPriority || XF < Prolog.OP_LOW)
190 				XF = -1;
191 			if (YF < minPriority || YF > maxPriority)
192 				YF = -1;
193 			if (YFX < minPriority || YFX > maxPriority)
194 				YFX = -1;
195 
196 			// XFX
197 			if (XFX >= XFY && XFX >= XF && XFX >= minPriority) { // XFX has
198 																	// priority
199 				Term found = expr(XFX - 1, commaIsEndMarker);
200 				if (found != null) {
201 					minPriority = XFX;
202 					leftRes = new Struct(operator.seq, new Term[] { leftRes, found }, Prolog.XFX);
203 					continue;
204 				}
205 			}
206 			// XFY
207 			else if (XFY >= XF && XFY >= minPriority) { // XFY has priority, or
208 														// XFX has failed
209 				Term found = expr(XFY, commaIsEndMarker);
210 				if (found != null) {
211 					minPriority = XFY;
212 					leftRes = new Struct(operator.seq, new Term[] { leftRes, found }, Prolog.XFY);
213 					continue;
214 				}
215 			}
216 			// XF
217 			else if (XF >= minPriority) // XF has priority, or XFX and/or XFY
218 										// has failed
219 				return new Struct(operator.seq, new Term[] { leftRes }, Prolog.XF);
220 
221 			// XFX did not have top priority, but XFY failed
222 			else if (XFX >= minPriority) {
223 				Term found = expr(XFX - 1, commaIsEndMarker);
224 				if (found != null) {
225 					minPriority = XFX;
226 					leftRes = new Struct(operator.seq, new Term[] { leftRes, found }, Prolog.XFX);
227 					continue;
228 				}
229 			}
230 			// YFX
231 			else if (YFX >= YF && YFX >= Prolog.OP_LOW) {
232 				Term found = expr(YFX - 1, commaIsEndMarker);
233 				if (found != null) {
234 					minPriority = YFX;
235 					leftRes = new Struct(operator.seq, new Term[] { leftRes, found }, Prolog.YFX);
236 					continue;
237 				}
238 			}
239 			// either YF has priority over YFX or YFX failed
240 			else if (YF >= Prolog.OP_LOW) {
241 				minPriority = YF;
242 				leftRes = new Struct(operator.seq, new Term[] { leftRes }, Prolog.YF);
243 				continue;
244 			}
245 			break;
246 		}
247 		tokenizer.unreadToken(operator);
248 		return leftRes;
249 	}
250 
251 	/**
252 	 * Parses and returns a valid 'leftside' of an expression. If the left side
253 	 * starts with a prefix, it consumes other expressions with a lower priority
254 	 * than itself. If the left side does not have a prefix it must be an expr0.
255 	 * 
256 	 * @param commaIsEndMarker
257 	 *            used when the leftside is part of and argument list of
258 	 *            expressions
259 	 * @param maxPriority
260 	 *            operators with a higher priority than this will effectivly end
261 	 *            the expression
262 	 * @return a wrapper of: 1. term correctly structured and 2. the priority of
263 	 *         its root operator
264 	 * @throws InvalidTermException
265 	 */
266 	private Term parseLeftSide(boolean commaIsEndMarker, int maxPriority) throws InvalidTermException, IOException {
267 		// 1. prefix expression
268 		Token f = tokenizer.readToken();
269 		if (f.isOperator(commaIsEndMarker) && !f.isFunctor()) {
270 			int FX = engine.getOperatorPriority(f.seq, Prolog.FX);
271 			int FY = engine.getOperatorPriority(f.seq, Prolog.FY);
272 
273 			if (f.seq.equals("-")) {
274 				Token t = tokenizer.readToken();
275 				if (t.isNumber())
276 					return jTrolog.terms.Number.create("-" + t.seq);
277 				else
278 					tokenizer.unreadToken(t);
279 			}
280 
281 			// check that no operator has a priority higher than permitted
282 			if (FY > maxPriority)
283 				FY = -1;
284 			if (FX > maxPriority)
285 				FX = -1;
286 
287 			// FX has priority over FY
288 			if (FX >= FY && FX >= Prolog.OP_LOW) {
289 				Term found = expr(FX - 1, commaIsEndMarker); // op(fx, n)
290 																// exprA(n - 1)
291 				if (found != null)
292 					return new Struct(f.seq, new Term[] { found }, Prolog.FX);
293 			}
294 			// FY has priority over FX, or FX has failed
295 			else if (FY >= Prolog.OP_LOW) {
296 				Term found = expr(FY, commaIsEndMarker); // op(fy,n) exprA(1200)
297 															// or op(fy,n)
298 															// expr(n)
299 				if (found != null)
300 					return new Struct(f.seq, new Term[] { found }, Prolog.FY);
301 			}
302 			// FY has priority over FX, but FY failed
303 			else if (FX >= Prolog.OP_LOW) {
304 				Term found = expr(FX - 1, commaIsEndMarker); // op(fx, n)
305 																// exprA(n - 1)
306 				if (found != null)
307 					return new Struct(f.seq, new Term[] { found }, Prolog.FX);
308 			}
309 		}
310 		tokenizer.unreadToken(f);
311 		// 2. expr0
312 		return expr0();
313 	}
314 
315 	/**
316 	 * exprA(0) ::= integer | float | variable | atom | atom( expr(1200) { ,
317 	 * exprA(1200) }* ) | '[' expr(1200) { , expr(1200) }* [ | exprA(1200) ] ']'
318 	 * | '{' [ exprA(1200) ] '}' | '(' exprA(1200) ')'
319 	 */
320 	private Term expr0() throws InvalidTermException, IOException {
321 		Token t1 = tokenizer.readToken();
322 
323 		if (t1.isType(Token.INTEGER))
324 			return Int.create(t1.seq);
325 
326 		if (t1.isType(Token.FLOAT))
327 			return new Double(t1.seq);
328 
329 		if (t1.isType(Token.VARIABLE)) {
330 			int pos = variableList.indexOf(t1.seq);
331 			if (pos != -1 && t1.seq != Var.ANY)
332 				return new Var(t1.seq, pos + 1);
333 			variableList.add(t1.seq);
334 			return new Var(t1.seq, variableList.size());
335 		}
336 
337 		if (t1.isFunctor()) {
338 			String functor = t1.seq;
339 			Token t = tokenizer.readToken(); // reading left par
340 			LinkedList l = new LinkedList();
341 			do {
342 				l.add(expr(Prolog.OP_HIGH, true)); // adding argument
343 				t = tokenizer.readToken();
344 				if (")".equals(t.seq)) // if right par, return
345 					return new Struct(functor, (Term[]) l.toArray(new Term[0]));
346 			} while (",".equals(t.seq)); // if comma, read next arg
347 			throw new InvalidTermException("Error in argument list syntax.\n" + "Token: " + t + " not expected at line " + tokenizer.lineno() + ".");
348 		}
349 
350 		if (t1.isType(Token.DQ_SEQUENCE)) {
351 			if (dqFlag == Parser.DQ_ATOMS) // DQ as atoms
352 				return new StructAtom(t1.seq);
353 			if (dqFlag == Parser.DQ_CHARS) // DQ as char list
354 				return stringToStructList(t1.seq);
355 			if (dqFlag == Parser.DQ_CODES) { // DQ as int list
356 				char[] chars = t1.seq.toCharArray();
357 				int[] codes = new int[chars.length];
358 				for (int i = 0; i < chars.length; i++)
359 					codes[i] = chars[i];
360 				return intsToStructList(codes);
361 			}
362 		}
363 
364 		if (t1.isAtom())
365 			return new StructAtom(t1.seq);
366 
367 		// todo review handling of ( ... ). Error in test set and it should have
368 		// consequences for how toString in Struct performs for operators that
369 		// have strange priorities
370 		if (t1.isType('(')) {
371 			Term term = expr(Prolog.OP_HIGH, false);
372 			if (tokenizer.readToken().isType(')'))
373 				return term;
374 			throw new InvalidTermException("Missing right parenthesis: (" + term + " -> here <-");
375 		}
376 
377 		if (t1.isType('[')) {
378 			Token t2 = tokenizer.readToken();
379 			if (t2.isType(']'))
380 				return Term.emptyList;
381 			tokenizer.unreadToken(t2);
382 
383 			LinkedList elems = new LinkedList();
384 			do {
385 				elems.add(expr(Prolog.OP_HIGH, true));
386 				t2 = tokenizer.readToken();
387 				if ("|".equals(t2.seq)) {
388 					elems.add(expr(Prolog.OP_HIGH, true));
389 					t2 = tokenizer.readToken();
390 					if ("]".equals(t2.seq))
391 						return createStructList(elems);
392 					throw new InvalidTermException("Missing ']' after: " + elems.getLast());
393 				}
394 				if ("]".equals(t2.seq)) {
395 					elems.add(Term.emptyList);
396 					return createStructList(elems);
397 				}
398 			} while (",".equals(t2.seq));
399 			throw new InvalidTermException("Error in list syntax after: " + elems.getLast());
400 		}
401 
402 		if (t1.isType('{')) {
403 			Token t2 = tokenizer.readToken();
404 			if (t2.isType('}'))
405 				return new StructAtom("{}");
406 
407 			tokenizer.unreadToken(t2);
408 			Term arg = expr(Prolog.OP_HIGH, false);
409 			t2 = tokenizer.readToken();
410 			if (t2.isType('}'))
411 				return new Struct("{}", new Term[] { arg });
412 			throw new InvalidTermException("Missing right braces: {" + arg + " -> here <-");
413 		}
414 
415 		throw new InvalidTermException("The following token could not be identified: " + t1.seq);
416 	}
417 
418 	public int getCurrentLine() {
419 		return tokenizer.lineno();
420 	}
421 
422 	public static Struct createListContainingAnyVars(int lengthInt) {
423 		LinkedList vars = new LinkedList();
424 		for (int i = 0; i < lengthInt; i++)
425 			vars.add(new Var("_", i + 1));
426 		vars.add(Term.emptyList);
427 		return createStructList(vars);
428 	}
429 
430 	public static Struct createStructList(LinkedList complete) {
431 		if (complete.isEmpty())
432 			return Term.emptyList;
433 		if (complete.size() == 2)
434 			return new Struct(".", new Term[] { (Term) complete.getFirst(), (Term) complete.getLast() });
435 		if (complete.size() > 2) {
436 			Term head = (Term) complete.removeFirst();
437 			return new Struct(".", new Term[] { head, createStructList(complete) });
438 		}
439 		throw new RuntimeException("omg-..");
440 	}
441 
442 	public static Struct stringToStructList(String charList) {
443 		Struct t = StructAtom.emptyList;
444 		for (int i = charList.length() - 1; i >= 0; i--)
445 			t = new Struct(".", new Term[] { new StructAtom(Character.toString(charList.charAt(i))), t });
446 		return t;
447 	}
448 
449 	public static Struct intsToStructList(int[] numbers) {
450 		Struct t = StructAtom.emptyList;
451 		for (int i = numbers.length - 1; i >= 0; i--)
452 			t = new Struct(".", new Term[] { new Int(numbers[i]), t });
453 		return t;
454 	}
455 
456 	public static boolean isSemiAndNotIf(Struct struct) {
457 		if (struct == null || struct.predicateIndicator != semiColonSignature)
458 			return false;
459 		final Term left = struct.getArg(0);
460 		return !(left instanceof Struct) || ((Struct) left).predicateIndicator != ifSignature;
461 	}
462 
463 	/**
464 	 * @param atom
465 	 * @return atom wrapped in 'atom' if necessary
466 	 */
467 	public static String wrapAtom(final String atom) {
468 		return isAtom(atom) || (atom.startsWith("'") && atom.endsWith("'")) ? atom : "'" + atom + "'";
469 	}
470 
471 	/**
472 	 * @return true if the String could be a prolog atom
473 	 */
474 	public static boolean isAtom(String s) {
475 		return atom.matcher(s).matches();
476 	}
477 
478 	static private Pattern atom = Pattern.compile("(!|[a-z][a-zA-Z_0-9]*)");
479 
480 	public static Number parseNumber(String s) throws InvalidTermException {
481 		Term t = new Parser(s).nextTerm(false);
482 		if (t instanceof Number)
483 			return (Number) t;
484 		throw new InvalidTermException("Term " + t + " is not a number.");
485 	}
486 
487 	public static String removeApices(String st) {
488 		if (st.startsWith("'") && st.endsWith("'") && st.length() > 2)
489 			return st.substring(1, st.length() - 1);
490 		return st;
491 	}
492 }