Equation Parsing in Python

Equation parsing in Python

Python's own internal compiler can parse this, if you use Python notation.

If your change the notation slightly, you'll be happier.

import compiler
eq= "sin(x)*x**2"
ast= compiler.parse( eq )

You get an abstract syntax tree that you can work with.

Parsing an equation with custom functions in Python

Here is a minimal working example (+, - , *, /, ** binary and unary operations and function call implemented). The priority of operations are set with parenthesis.

A little bit more than the functionality for the example given is done:

from __future__ import print_function
import ast

def transform(eq,functions):
class EqVisitor(ast.NodeVisitor):
def visit_BinOp(self,node):
#generate("=>BinOp")
generate("(")
self.visit(node.left)
self.visit(node.op)
#generate("ici",str(node.op),node._fields,node._attributes)
#generate(dir(node.op))
self.visit(node.right)
generate(")")
#ast.NodeVisitor.generic_visit(self,node)
def visit_USub(self,node):
generate("-")
def visit_UAdd(self,node):
generate("+")

def visit_Sub(self,node):
generate("-")
def visit_Add(self,node):
generate("+")
def visit_Pow(self,node):
generate("**")
def visit_Mult(self,node):
generate("*")
def visit_Div(self,node):
generate("/")
def visit_Name(self,node):
generate(node.id)
def visit_Call(self,node):
debug("function",node.func.id)
if node.func.id in functions:
debug("defined function")
func_visit(functions[node.func.id],node.args)
return
debug("not defined function",node.func.id)
#generate(node._fields)
#generate("args")
generate(node.func.id)
generate("(")
sep = ""
for arg in node.args:
generate (sep)
self.visit(arg)
sep=","
generate(")")
def visit_Num(self,node):
generate(node.n)
def generic_visit(self, node):

debug ("\n",type(node).__name__)
debug (node._fields)
ast.NodeVisitor.generic_visit(self, node)

def func_visit(definition,concrete_args):
class FuncVisitor(EqVisitor):
def visit_arguments(self,node):
#generate("visit arguments")
#generate(node._fields)
self.arguments={}
for concrete_arg,formal_arg in zip(concrete_args,node.args):
#generate(formal_arg._fields)
self.arguments[formal_arg.id]=concrete_arg
debug(self.arguments)
def visit_Name(self,node):
debug("visit Name",node.id)
if node.id in self.arguments:
eqV.visit(self.arguments[node.id])
else:
generate(node.id)

funcV=FuncVisitor()
funcV.visit(ast.parse(definition))

eqV=EqVisitor()
result = []
def generate(s):
#following line maybe usefull for debug
debug(str(s))
result.append(str(s))
eqV.visit(ast.parse(eq,mode="eval"))
return "".join(result)
def debug(*args,**kwargs):
#print(*args,**kwargs)
pass

Usage:

functions= {
"f1":"def f1(x,y):return x+y**2",
"f2":"def f2(x,y):return sin(x+y)",
}
eq="-(a+b)+f1(f2(+x,y),z)*4/365.12-h"
print(transform(eq,functions))

Result

((-(a+b)+(((sin((+x+y))+(z**2))*4)/365.12))-h)

WARNING

The code works with Python 2.7 and as it is AST dependent is not guaranteed to work with another version of Python. The Python 3 version doesn't work.

parsing and manipulating user's equations python

You can use the ast module of python3 to parse the equation as shown in this answer(that one is in python2.6, but the ast module is in python3 as well). After you've recovered the Node type inside your equation you can use that to correctly evaluate your equation. This documentation can help you in the process. This is an example on how you can visit the tree:

import ast

class v(ast.NodeVisitor):
def generic_visit(self, node):

if type(node).__name__ == "Name":
print(type(node).__name__, node.id)
elif type(node).__name__ == "Constant":
print(type(node).__name__, node.value)
else:
print(type(node).__name__)
ast.NodeVisitor.generic_visit(self, node)

eq= 'sin(x)**2'
parsed= ast.parse(eq)
visitor = v()
visitor.generic_visit(parsed)

This code will print the identifier of a variable(x for example) or of a function(the sin funtion for example). You can also add these lines to evaluate an espression:

from math import sin
x = 2
print(eval(eq))

You can achieve a similar result by manipulating the nodes inside the Abstract Syntax Tree using a NodeTransformer as shown in this example.

Parsing an equation with sub-formulas in python

So i'm spitballing a bit, but here goes.

The compiler.parse function returns an instance of compiler.ast.Module which contains an abstract syntax tree. You can traverse this instance using the getChildNodes method. By recursively examining the left and right attributes of the nodes as you traverse the tree you can isolate compiler.ast.Name instances and swap them out for your substitution expressions.

So a worked example might be:

import compiler

def recursive_parse(node,substitutions):

# look for left hand side of equation and test
# if it is a variable name

if hasattr(node.left,"name"):
if node.left.name in substitutions.keys():
node.left = substitutions[node.left.name]
else:

# if not, go deeper
recursive_parse(node.left,substitutions)

# look for right hand side of equation and test
# if it is a variable name

if hasattr(node.right,"name"):
if node.right.name in substitutions.keys():
node.right = substitutions[node.right.name]
else:

# if not, go deeper
recursive_parse(node.right,substitutions)

def main(input):

substitutions = {
"r":"sqrt(x**2+y**2)"
}

# each of the substitutions needs to be compiled/parsed
for key,value in substitutions.items():

# this is a quick ugly way of getting the data of interest
# really this should be done in a programatically cleaner manner
substitutions[key] = compiler.parse(substitutions[key]).getChildNodes()[0].getChildNodes()[0].getChildNodes()[0]

# compile the input expression.
expression = compiler.parse(input)

print "Input: ",expression

# traverse the selected input, here we only pass the branch of interest.
# again, as with above, this done quick and dirty.
recursive_parse(expression.getChildNodes()[0].getChildNodes()[0].getChildNodes()[1],substitutions)

print "Substituted: ",expression

if __name__ == "__main__":
input = "t = r*p"
main(input)

I have admittedly only tested this on a handful of use cases, but I think the basis is there for a generic implementation that can handle a wide variety of inputs.

Running this, I get the output:

Input:         Module(None, Stmt([Assign([AssName('t', 'OP_ASSIGN')], Mul((Name('r'), Name('p'))))]))
Substituted: Module(None, Stmt([Assign([AssName('t', 'OP_ASSIGN')], Mul((CallFunc(Name('sqrt'), [Add((Power((Name('x'), Const(2))), Power((Name('y'), Const(2)))))], None, None), Name('p'))))]))

EDIT:

So the compiler module is depreciated in Python 3.0, so a better (and cleaner) solution would be to use the ast module:

import ast
from math import sqrt

# same a previous recursion function but with looking for 'id' not 'name' attribute
def recursive_parse(node,substitutions):
if hasattr(node.left,"id"):
if node.left.id in substitutions.keys():
node.left = substitutions[node.left.id]
else:
recursive_parse(node.left,substitutions)

if hasattr(node.right,"id"):
if node.right.id in substitutions.keys():
node.right = substitutions[node.right.id]
else:
recursive_parse(node.right,substitutions)

def main(input):

substitutions = {
"r":"sqrt(x**2+y**2)"
}

for key,value in substitutions.items():
substitutions[key] = ast.parse(substitutions[key], mode='eval').body

# As this is an assignment operation, mode must be set to exec
module = ast.parse(input, mode='exec')

print "Input: ",ast.dump(module)

recursive_parse(module.body[0].value,substitutions)

print "Substituted: ",ast.dump(module)

# give some values for the equation
x = 3
y = 2
p = 1
code = compile(module,filename='<string>',mode='exec')
exec(code)

print input
print "t =",t

if __name__ == "__main__":
input = "t = r*p"
main(input)

This will compile the expression and execute it in the local space. The output should be:

Input:         Module(body=[Assign(targets=[Name(id='t', ctx=Store())], value=BinOp(left=Name(id='r', ctx=Load()), op=Mult(), right=Name(id='p', ctx=Load())))])
Substituted: Module(body=[Assign(targets=[Name(id='t', ctx=Store())], value=BinOp(left=Call(func=Name(id='sqrt', ctx=Load()), args=[BinOp(left=BinOp(left=Name(id='x', ctx=Load()), op=Pow(), right=Num(n=2)), op=Add(), right=BinOp(left=Name(id='y', ctx=Load()), op=Pow(), right=Num(n=2)))], keywords=[], starargs=None, kwargs=None), op=Mult(), right=Name(id='p', ctx=Load())))])
t = r*p
t = 3.60555127546

Safe way to parse user-supplied mathematical formula in Python

Check out Paul McGuire's pyparsing. He has written both the general parser and a grammar for arithmetic expressions:

from __future__ import division
import pyparsing as pyp
import math
import operator

class NumericStringParser(object):
'''
Most of this code comes from the fourFn.py pyparsing example
http://pyparsing.wikispaces.com/file/view/fourFn.py
http://pyparsing.wikispaces.com/message/view/home/15549426
__author__='Paul McGuire'

All I've done is rewrap Paul McGuire's fourFn.py as a class, so I can use it
more easily in other places.
'''
def pushFirst(self, strg, loc, toks ):
self.exprStack.append( toks[0] )
def pushUMinus(self, strg, loc, toks ):
if toks and toks[0] == '-':
self.exprStack.append( 'unary -' )
def __init__(self):
"""
expop :: '^'
multop :: '*' | '/'
addop :: '+' | '-'
integer :: ['+' | '-'] '0'..'9'+
atom :: PI | E | real | fn '(' expr ')' | '(' expr ')'
factor :: atom [ expop factor ]*
term :: factor [ multop factor ]*
expr :: term [ addop term ]*
"""
point = pyp.Literal( "." )
e = pyp.CaselessLiteral( "E" )
fnumber = pyp.Combine( pyp.Word( "+-"+pyp.nums, pyp.nums ) +
pyp.Optional( point + pyp.Optional( pyp.Word( pyp.nums ) ) ) +
pyp.Optional( e + pyp.Word( "+-"+pyp.nums, pyp.nums ) ) )
ident = pyp.Word(pyp.alphas, pyp.alphas+pyp.nums+"_$")
plus = pyp.Literal( "+" )
minus = pyp.Literal( "-" )
mult = pyp.Literal( "*" )
div = pyp.Literal( "/" )
lpar = pyp.Literal( "(" ).suppress()
rpar = pyp.Literal( ")" ).suppress()
addop = plus | minus
multop = mult | div
expop = pyp.Literal( "^" )
pi = pyp.CaselessLiteral( "PI" )
expr = pyp.Forward()
atom = ((pyp.Optional(pyp.oneOf("- +")) +
(pi|e|fnumber|ident+lpar+expr+rpar).setParseAction(self.pushFirst))
| pyp.Optional(pyp.oneOf("- +")) + pyp.Group(lpar+expr+rpar)
).setParseAction(self.pushUMinus)
# by defining exponentiation as "atom [ ^ factor ]..." instead of
# "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = pyp.Forward()
factor << atom + pyp.ZeroOrMore( ( expop + factor ).setParseAction(
self.pushFirst ) )
term = factor + pyp.ZeroOrMore( ( multop + factor ).setParseAction(
self.pushFirst ) )
expr << term + pyp.ZeroOrMore( ( addop + term ).setParseAction( self.pushFirst ) )
self.bnf = expr
# map operator symbols to corresponding arithmetic operations
epsilon = 1e-12
self.opn = { "+" : operator.add,
"-" : operator.sub,
"*" : operator.mul,
"/" : operator.truediv,
"^" : operator.pow }
self.fn = { "sin" : math.sin,
"cos" : math.cos,
"tan" : math.tan,
"abs" : abs,
"trunc" : lambda a: int(a),
"round" : round,
# For Python3 compatibility, cmp replaced by ((a > 0) - (a < 0)). See
# https://docs.python.org/3.0/whatsnew/3.0.html#ordering-comparisons
"sgn" : lambda a: abs(a)>epsilon and ((a > 0) - (a < 0)) or 0}
self.exprStack = []
def evaluateStack(self, s ):
op = s.pop()
if op == 'unary -':
return -self.evaluateStack( s )
if op in "+-*/^":
op2 = self.evaluateStack( s )
op1 = self.evaluateStack( s )
return self.opn[op]( op1, op2 )
elif op == "PI":
return math.pi # 3.1415926535
elif op == "E":
return math.e # 2.718281828
elif op in self.fn:
return self.fn[op]( self.evaluateStack( s ) )
elif op[0].isalpha():
return 0
else:
return float( op )
def eval(self, num_string, parseAll = True):
self.exprStack = []
results = self.bnf.parseString(num_string, parseAll)
val = self.evaluateStack( self.exprStack[:] )
return val

nsp = NumericStringParser()
print(nsp.eval('1+2'))
# 3.0

print(nsp.eval('2*3-5'))
# 1.0

Parsing equations in Python using regex

Try using re.split()
It will split your equation on the basis of operators.
For example:

text = 'a=x[3]+50'
pattern = r'([\=\+\-\%\*])'
result = re.split(pattern, text)

Output:

['a', '=', 'x[3]', '+', '50']

parsing and evaluating arbirtary equations with python

You can use DataFrame.eval:

equation1 ="a + 2*a/b"
print (df.eval(equation1))
0 2.00
1 5.60
2 8.75
dtype: float64

Math operations from string

Warning: this way is not a safe way, but is very easy to use. Use it wisely.

Use the eval function.

print eval('2 + 4')

Output:

6

You can even use variables or regular python code.

a = 5
print eval('a + 4')

Output:

9

You also can get return values:

d = eval('4 + 5')
print d

Output:

9

Or call functions:

def add(a, b):
return a + b

def subtract(a, b):
return a - b

a = 20
b = 10
print eval('add(a, b)')
print eval('subtract(a, b)')

Output:

30
10

In case you want to write a parser, maybe instead you can built a python code generator if that is easier and use eval to run the code. With eval you can execute any Python evalution.

Why eval is unsafe?

Since you can put literally anything in the eval, e.g. if the input argument is:

os.system(‘rm -rf /’)

It will remove all files on your system (at least on Linux/Unix).
So only use eval when you trust the input.



Related Topics



Leave a reply



Submit