From 0b49081c1ec1c30442e40f4700fed742c52f1015 Mon Sep 17 00:00:00 2001 From: Redempt Date: Thu, 5 Oct 2023 21:14:20 -0400 Subject: [PATCH] Finish initial parser rewrite (untested) --- src/redempt/crunch/ExpressionCompiler.java | 298 ++++-------------- src/redempt/crunch/ExpressionParser.java | 185 +++++++++++ src/redempt/crunch/Parser.java | 59 ---- src/redempt/crunch/ShuntingYard.java | 40 +++ src/redempt/crunch/data/CharTree.java | 4 +- .../ExpressionCompilationException.java | 8 +- 6 files changed, 286 insertions(+), 308 deletions(-) create mode 100644 src/redempt/crunch/ExpressionParser.java delete mode 100644 src/redempt/crunch/Parser.java create mode 100644 src/redempt/crunch/ShuntingYard.java diff --git a/src/redempt/crunch/ExpressionCompiler.java b/src/redempt/crunch/ExpressionCompiler.java index 2ace0f1..8c9efb1 100644 --- a/src/redempt/crunch/ExpressionCompiler.java +++ b/src/redempt/crunch/ExpressionCompiler.java @@ -1,10 +1,6 @@ package redempt.crunch; -import redempt.crunch.data.CharTree; import redempt.crunch.data.FastNumberParsing; -import redempt.crunch.data.Pair; -import redempt.crunch.data.TokenList; -import redempt.crunch.data.TokenList.Node; import redempt.crunch.exceptions.ExpressionCompilationException; import redempt.crunch.functional.ArgumentList; import redempt.crunch.functional.ExpressionEnv; @@ -12,10 +8,6 @@ import redempt.crunch.functional.FunctionCall; import redempt.crunch.token.*; -import java.util.ArrayList; -import java.util.List; -import java.util.StringJoiner; - class ExpressionCompiler { private static final char VAR_CHAR = '$'; @@ -24,53 +16,86 @@ static CompiledExpression compile(String expression, ExpressionEnv env) { if (expression == null || env == null) { throw new ExpressionCompilationException(null, "Expression and environment may not be null"); } - CompiledExpression exp = new CompiledExpression(); - Value val = compileValue(expression, exp, env, 0, false).getFirst(); - exp.setValue(val); - return exp; + ExpressionParser parser = new ExpressionParser(expression, env); + return parser.parse(); } - private static Value parseExpression(Parser parser, ExpressionEnv env) { - List tokens = new ArrayList<>(); - - return null; + private static Value parseExpression(ExpressionParser parser, ExpressionEnv env) { + ShuntingYard tokens = new ShuntingYard(); + tokens.addValue(parseTerm(parser, env)); + parser.whitespace(); + while (!parser.isAtEnd() && parser.peek() != ')') { + Token token = env.getNamedTokens().getWith(parser); + if (!(token instanceof BinaryOperator)) { + throw new ExpressionCompilationException(parser, "Expected binary operator"); + } + tokens.addOperator((BinaryOperator) token); + parser.whitespace(); + tokens.addValue(parseTerm(parser, env)); + } + return tokens.finish(); } - private static Value parseOptionalNestedExpression(Parser parser, ExpressionEnv env) { - if (parser.peek() != '(') { - return null; - } - parser.advanceCursor(); + private static Value parseNestedExpression(ExpressionParser parser, ExpressionEnv env) { + parser.expectChar('('); + parser.whitespace(); Value expression = parseExpression(parser, env); parser.expectChar(')'); return expression; } - private static Value parseTerm(Parser parser, ExpressionEnv env) { - Value nested = parseOptionalNestedExpression(parser, env); - if (nested != null) { - return nested; + private static Value parseTerm(ExpressionParser parser, ExpressionEnv env) { + switch (parser.peek()) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '.': + return parseLiteral(parser); + case '(': + return parseNestedExpression(parser, env); } Token token = env.getNamedTokens().getWith(parser); + if (token == null) { + throw new ExpressionCompilationException(parser, "Expected value"); + } if (token instanceof Value) { return (Value) token; } return parseLeadingOperation(parser, env, token); } - private static Value parseLeadingOperation(Parser parser, ExpressionEnv env, Token operation) { - switch (operation.getType()) { + private static LiteralValue parseLiteral(ExpressionParser parser) { + int start = parser.cur; + char c; + while (Character.isDigit(c = parser.peek()) || c == '.') { + parser.advanceCursor(); + } + return new LiteralValue(FastNumberParsing.parseInt(parser.str, start, parser.cur)); + } + + private static Value parseLeadingOperation(ExpressionParser parser, ExpressionEnv env, Token token) { + if (token instanceof Value) { + return (Value) token; + } + switch (token.getType()) { case UNARY_OPERATOR: - return new UnaryOperation((UnaryOperator) operation, parseTerm(parser, env)); + return new UnaryOperation((UnaryOperator) token, parseTerm(parser, env)); case FUNCTION: - Function function = (Function) operation; + Function function = (Function) token; ArgumentList args = parseArgumentList(parser, env, function.getArgCount()); return new FunctionCall(function, args.getArguments()); } throw new ExpressionCompilationException(parser, "Expected leading operation"); } - private static ArgumentList parseArgumentList(Parser parser, ExpressionEnv env, int args) { + private static ArgumentList parseArgumentList(ExpressionParser parser, ExpressionEnv env, int args) { parser.expectChar('('); parser.whitespace(); Value[] values = new Value[args]; @@ -89,218 +114,5 @@ private static ArgumentList parseArgumentList(Parser parser, ExpressionEnv env, parser.expectChar(')'); return new ArgumentList(values); } - - private static Pair compileValue(String expression, CompiledExpression exp, ExpressionEnv env, int begin, boolean parenthetical) { - CharTree namedTokens = env.getNamedTokens(); - TokenList tokens = new TokenList(); - Pair firstOp = namedTokens.getFrom(expression, begin); - boolean op = firstOp.getFirst() != null && firstOp.getFirst().getType() == TokenType.BINARY_OPERATOR; - boolean closed = false; - int tokenStart = begin; - char[] chars = expression.toCharArray(); - int i; - loop: - for (i = begin; i < expression.length(); i++) { - char c = chars[i]; - switch (c) { - case '(': - if (tokens.size() > 0 && tokens.tail().token.getType() == TokenType.FUNCTION) { - Pair args = compileArgumentList(expression, exp, env, i + 1); - tokens.add(args.getFirst()); - i += args.getSecond(); - tokenStart = i; - op = true; - continue; - } - if (!op && tokenStart != i) { - tokens.add(compileToken(expression, tokenStart, i, exp)); - } - if (tokens.tail() != null && tokens.tail().token instanceof Value) { - tokens.add(BinaryOperator.MULTIPLY); - } - Pair inner = compileValue(expression, exp, env, i + 1, true); - i += inner.getSecond() + 1; - tokens.add(inner.getFirst()); - tokenStart = i; - op = true; - continue; - case ' ': - if (!op && tokenStart != i) { - tokens.add(compileToken(expression, tokenStart, i, exp)); - tokenStart = i + 1; - } else { - tokenStart++; - } - continue; - case ')': - case ',': - if (!parenthetical) { - throw new ExpressionCompilationException("Unbalanced parenthesis"); - } - closed = true; - break loop; - } - Pair namedToken = namedTokens.getFrom(expression, i); - if (namedToken.getFirst() != null) { - Token token = namedToken.getFirst(); - if (token.getType() == TokenType.VARIABLE) { - Variable var = ((Variable) token).getClone(); - var.expression = exp; - token = var; - } - if (!op && tokenStart != i) { - tokens.add(compileToken(expression, tokenStart, i, exp)); - } - if (!(token.getType() == TokenType.BINARY_OPERATOR && !((BinaryOperator) token).isUnary()) - && tokens.tail() != null && tokens.tail().token instanceof Value) { - tokens.add(BinaryOperator.MULTIPLY); - } - if (token == BinaryOperator.SUBTRACT && (tokens.size() == 0 || !(tokens.tail().token instanceof Value))) { - token = BinaryOperator.NEGATE; - } - op = token.getType() == TokenType.BINARY_OPERATOR; - i += namedToken.getSecond() - 1; - tokenStart = i + 1; - tokens.add(token); - continue; - } - op = false; - } - if (parenthetical && !closed) { - throw new ExpressionCompilationException("Unbalanced parenthesis"); - } - if (tokenStart < i && i <= expression.length() && !op) { - tokens.add(compileToken(expression, tokenStart, i, exp)); - } - return new Pair<>(reduceTokens(tokens), i - begin); - } - - private static Pair compileArgumentList(String expression, CompiledExpression exp, ExpressionEnv env, int start) { - List values = new ArrayList<>(); - int i = start; - loop: - while (i < expression.length() && expression.charAt(i) != ')') { - Pair result = compileValue(expression, exp, env, i, true); - i += result.getSecond() + 1; - values.add(result.getFirst()); - switch (expression.charAt(i - 1)) { - case ')': - break loop; - case ',': - break; - default: - throw new ExpressionCompilationException("Function argument lists must be separated by commas"); - } - } - if (values.size() == 0) { - i++; - } - if (expression.charAt(i - 1) != ')') { - throw new ExpressionCompilationException("Unbalanced parenthesis"); - } - Value[] valueArray = values.toArray(new Value[values.size()]); - return new Pair<>(new ArgumentList(valueArray), i - start); - } - - private static class OperatorList extends ArrayList {} - - private static Value reduceTokens(TokenList tokens) { - OperatorList[] priorities = new OperatorList[11]; - for (Node node = tokens.head(); node != null; node = node.next) { - Token token = node.token; - if (token.getType() == TokenType.FUNCTION) { - createFunctionCall(node); - continue; - } - if (token.getType() == TokenType.BINARY_OPERATOR) { - BinaryOperator op = (BinaryOperator) token; - OperatorList ops = priorities[op.getPriority()]; - if (ops == null) { - ops = new OperatorList(); - priorities[op.getPriority()] = ops; - } - ops.add(node); - } - } - for (int i = priorities.length - 1; i >= 0; i--) { - OperatorList list = priorities[i]; - if (list == null) { - continue; - } - list.forEach(ExpressionCompiler::createOperation); - } - Token token = tokens.head().token; - if (!(token instanceof Value)) { - throw new ExpressionCompilationException("Token is not a value: " + token.toString()); - } - if (tokens.size() > 1) { - StringJoiner joiner = new StringJoiner(", "); - tokens.forEach(t -> joiner.add(t.toString())); - throw new ExpressionCompilationException("Adjacent values have no operators between them: " + joiner.toString()); - } - return (Value) tokens.head().token; - } - - private static void createFunctionCall(Node node) { - if (node.next == null) { - throw new ExpressionCompilationException("Function must be followed by argument list"); - } - Token next = node.next.token; - if (next.getType() != TokenType.ARGUMENT_LIST) { - throw new ExpressionCompilationException("Function must be followed by argument list"); - } - Function func = (Function) node.token; - ArgumentList list = (ArgumentList) next; - if (list.getArguments().length != func.getArgCount()) { - throw new ExpressionCompilationException("Function '" + func.getName() + "' takes " + func.getArgCount() + " args, but got " + list.getArguments().length); - } - node.removeAfter(); - node.token = new FunctionCall(func, list.getArguments()); - } - - private static void createOperation(Node node) { - BinaryOperator op = (BinaryOperator) node.token; - if (node.next == null) { - throw new ExpressionCompilationException("Operator " + op + " has no following operand"); - } - if (op.isUnary()) { - Token next = node.next.token; - node.removeAfter(); - if (next.getType() == TokenType.BINARY_OPERATOR) { - throw new ExpressionCompilationException("Adjacent operators have no values to operate on"); - } - if (next.getType() == TokenType.LITERAL_VALUE && op.canInline()) { - Value literal = (Value) next; - node.token = new LiteralValue(op.operate(literal.getValue())); - return; - } - node.token = new BinaryOperation(op, (Value) next); - return; - } - if (node.prev == null) { - throw new ExpressionCompilationException("Operator " + op + " has no leading operand"); - } - Token next = node.next.token; - node.removeAfter(); - Token prev = node.prev.token; - node.removeBefore(); - if (prev.getType() == TokenType.BINARY_OPERATOR || next.getType() == TokenType.BINARY_OPERATOR) { - throw new ExpressionCompilationException("Adjacent operators have no values to operate on"); - } - if (prev.getType() == TokenType.LITERAL_VALUE && next.getType() == TokenType.LITERAL_VALUE && op.canInline()) { - Value lit1 = (Value) prev; - Value lit2 = (Value) next; - node.token = new LiteralValue(op.operate(lit1.getValue(), lit2.getValue())); - return; - } - node.token = new BinaryOperation(op, (Value) prev, (Value) next); - } - - private static Token compileToken(String str, int start, int end, CompiledExpression exp) { - if (str.charAt(start) == VAR_CHAR) { - return new Variable(exp, FastNumberParsing.parseInt(str, start + 1, end) - 1); - } - return new LiteralValue(FastNumberParsing.parseDouble(str, start, end)); - } } diff --git a/src/redempt/crunch/ExpressionParser.java b/src/redempt/crunch/ExpressionParser.java new file mode 100644 index 0000000..3f99757 --- /dev/null +++ b/src/redempt/crunch/ExpressionParser.java @@ -0,0 +1,185 @@ +package redempt.crunch; + +import redempt.crunch.data.CharTree; +import redempt.crunch.data.FastNumberParsing; +import redempt.crunch.data.Pair; +import redempt.crunch.exceptions.ExpressionCompilationException; +import redempt.crunch.functional.ArgumentList; +import redempt.crunch.functional.ExpressionEnv; +import redempt.crunch.functional.Function; +import redempt.crunch.functional.FunctionCall; +import redempt.crunch.token.*; + +public class ExpressionParser { + + public final String str; + public int cur = 0; + public final ExpressionEnv env; + private CompiledExpression expr = new CompiledExpression(); + + public ExpressionParser(String str, ExpressionEnv env) { + this.str = str; + this.env = env; + } + + public char peek() { + return str.charAt(cur); + } + + public char advance() { + return str.charAt(cur++); + } + + public void advanceCursor() { + cur++; + } + + public boolean isAtEnd() { + return cur >= str.length(); + } + + public void expectChar(char c) { + if (advance() != c) { + throw new ExpressionCompilationException(this, "Expected '" + c + "'"); + } + } + + private ExpressionCompilationException error(String msg) { + throw new ExpressionCompilationException(this, msg); + } + + public void whitespace() { + while (Character.isWhitespace(peek())) { + cur++; + } + } + + public boolean strMatches(String prefix, boolean advance) { + boolean matches = str.regionMatches(cur, prefix, 0, prefix.length()); + if (matches && advance) { + cur += prefix.length(); + } + return matches; + } + + public T getWith(CharTree tree) { + Pair result = tree.getFrom(str, cur); + T parsed = result.getFirst(); + if (parsed == null) { + return null; + } + int offset = result.getSecond(); + cur += offset; + return parsed; + } + + private Value parseExpression() { + Value first = parseTerm(); + if (isAtEnd() || peek() == ')') { + return first; + } + ShuntingYard tokens = new ShuntingYard(); + tokens.addValue(first); + whitespace(); + while (!isAtEnd() && peek() != ')') { + Token token = env.getNamedTokens().getWith(this); + if (!(token instanceof BinaryOperator)) { + error("Expected binary operator"); + } + tokens.addOperator((BinaryOperator) token); + whitespace(); + tokens.addValue(parseTerm()); + } + return tokens.finish(); + } + + private Value parseNestedExpression() { + expectChar('('); + whitespace(); + Value expression = parseExpression(); + expectChar(')'); + return expression; + } + + private Value parseTerm() { + switch (peek()) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '.': + return parseLiteral(); + case '(': + return parseNestedExpression(); + } + Token token = env.getNamedTokens().getWith(this); + if (token == null) { + error("Expected value"); + } + if (token instanceof Value) { + if (token instanceof Variable) { + ((Variable) token).expression = expr; + } + return (Value) token; + } + return parseLeadingOperation(token); + } + + private LiteralValue parseLiteral() { + int start = cur; + char c; + while (Character.isDigit(c = peek()) || c == '.') { + advanceCursor(); + } + return new LiteralValue(FastNumberParsing.parseInt(str, start, cur)); + } + + private Value parseLeadingOperation(Token token) { + if (token instanceof Value) { + return (Value) token; + } + switch (token.getType()) { + case UNARY_OPERATOR: + return new UnaryOperation((UnaryOperator) token, parseTerm()); + case FUNCTION: + Function function = (Function) token; + ArgumentList args = parseArgumentList(function.getArgCount()); + return new FunctionCall(function, args.getArguments()); + } + error("Expected leading operation"); + return null; + } + + private ArgumentList parseArgumentList(int args) { + expectChar('('); + whitespace(); + Value[] values = new Value[args]; + if (args == 0) { + expectChar(')'); + return new ArgumentList(new Value[0]); + } + values[0] = parseExpression(); + whitespace(); + for (int i = 1; i < args; i++) { + expectChar(','); + values[i] = parseExpression(); + whitespace(); + } + + expectChar(')'); + return new ArgumentList(values); + } + + public CompiledExpression parse() { + Value value = parseExpression(); + expr.setValue(value); + return expr; + } + +} \ No newline at end of file diff --git a/src/redempt/crunch/Parser.java b/src/redempt/crunch/Parser.java deleted file mode 100644 index 57a515b..0000000 --- a/src/redempt/crunch/Parser.java +++ /dev/null @@ -1,59 +0,0 @@ -package redempt.crunch; - -import redempt.crunch.data.CharTree; -import redempt.crunch.data.Pair; -import redempt.crunch.exceptions.ExpressionCompilationException; - -public class Parser { - - public final String str; - public int cur = 0; - - public Parser(String str) { - this.str = str; - } - - public char peek() { - return str.charAt(cur); - } - - public char advance() { - return str.charAt(cur++); - } - - public void advanceCursor() { - cur++; - } - - public void expectChar(char c) { - if (advance() != c) { - throw new ExpressionCompilationException(this, "Expected '" + c + "'"); - } - } - - public void whitespace() { - while (Character.isWhitespace(peek())) { - cur++; - } - } - - public boolean strMatches(String prefix, boolean advance) { - boolean matches = str.regionMatches(cur, prefix, 0, prefix.length()); - if (matches && advance) { - cur += prefix.length(); - } - return matches; - } - - public T getWith(CharTree tree) { - Pair result = tree.getFrom(str, cur); - T parsed = result.getFirst(); - if (parsed == null) { - return null; - } - int offset = result.getSecond(); - cur += offset; - return parsed; - } - -} \ No newline at end of file diff --git a/src/redempt/crunch/ShuntingYard.java b/src/redempt/crunch/ShuntingYard.java new file mode 100644 index 0000000..846799b --- /dev/null +++ b/src/redempt/crunch/ShuntingYard.java @@ -0,0 +1,40 @@ +package redempt.crunch; + +import redempt.crunch.token.BinaryOperation; +import redempt.crunch.token.BinaryOperator; +import redempt.crunch.token.Value; + +import java.util.ArrayDeque; +import java.util.Deque; + +public class ShuntingYard { + + private Deque operators = new ArrayDeque<>(); + private Deque stack = new ArrayDeque<>(); + + public void addOperator(BinaryOperator operator) { + while (!operators.isEmpty() && operator.priority <= operators.getLast().priority) { + createOperation(); + } + operators.add(operator); + } + + public void addValue(Value value) { + stack.add(value); + } + + private BinaryOperation createOperation() { + BinaryOperator op = operators.removeLast(); + Value right = stack.removeLast(); + Value left = stack.removeLast(); + return new BinaryOperation(op, left, right); + } + + public Value finish() { + while (stack.size() > 1) { + createOperation(); + } + return stack.removeLast(); + } + +} diff --git a/src/redempt/crunch/data/CharTree.java b/src/redempt/crunch/data/CharTree.java index 67afd78..68bbda1 100644 --- a/src/redempt/crunch/data/CharTree.java +++ b/src/redempt/crunch/data/CharTree.java @@ -1,6 +1,6 @@ package redempt.crunch.data; -import redempt.crunch.Parser; +import redempt.crunch.ExpressionParser; /** * A simple implementation of a prefix tree for better parsing @@ -70,7 +70,7 @@ public Pair getFrom(String str, int index) { return new Pair<>(val, str.length() - index); } - public T getWith(Parser parser) { + public T getWith(ExpressionParser parser) { Node node = root; T val = null; int lastParsed = parser.cur; diff --git a/src/redempt/crunch/exceptions/ExpressionCompilationException.java b/src/redempt/crunch/exceptions/ExpressionCompilationException.java index b8da31b..77418d5 100644 --- a/src/redempt/crunch/exceptions/ExpressionCompilationException.java +++ b/src/redempt/crunch/exceptions/ExpressionCompilationException.java @@ -1,17 +1,17 @@ package redempt.crunch.exceptions; -import redempt.crunch.Parser; +import redempt.crunch.ExpressionParser; public class ExpressionCompilationException extends RuntimeException { - private Parser parser; + private ExpressionParser parser; - public ExpressionCompilationException(Parser parser, String message) { + public ExpressionCompilationException(ExpressionParser parser, String message) { super(message); this.parser = parser; } - public Parser getParser() { + public ExpressionParser getParser() { return parser; }