Parser.cpp 15.5 KB
Newer Older
1
2
3
4
5
6
7
8
/* -------------------------------------------------------------------------- *
 *                                   Lepton                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the Lepton expression parser originating from              *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
9
 * Portions copyright (c) 2009-2013 Stanford University and the Authors.      *
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * Permission is hereby granted, free of charge, to any person obtaining a    *
 * copy of this software and associated documentation files (the "Software"), *
 * to deal in the Software without restriction, including without limitation  *
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
 * and/or sell copies of the Software, and to permit persons to whom the      *
 * Software is furnished to do so, subject to the following conditions:       *
 *                                                                            *
 * The above copyright notice and this permission notice shall be included in *
 * all copies or substantial portions of the Software.                        *
 *                                                                            *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

32
33
34
35
36
37
#include "lepton/Parser.h"
#include "lepton/CustomFunction.h"
#include "lepton/Exception.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/Operation.h"
#include "lepton/ParsedExpression.h"
38
#include <cctype>
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#include <iostream>

using namespace Lepton;
using namespace std;

static const string Digits = "0123456789";
static const string Operators = "+-*/^";
static const bool LeftAssociative[] = {true, true, true, true, false};
static const int Precedence[] = {0, 0, 1, 1, 3};
static const Operation::Id OperationId[] = {Operation::ADD, Operation::SUBTRACT, Operation::MULTIPLY, Operation::DIVIDE, Operation::POWER};

class Lepton::ParseToken {
public:
    enum Type {Number, Operator, Variable, Function, LeftParen, RightParen, Comma, Whitespace};

    ParseToken(string text, Type type) : text(text), type(type) {
    }
    const string& getText() const {
        return text;
    }
    Type getType() const {
        return type;
    }
private:
    string text;
    Type type;
};

67
68
69
70
string Parser::trim(const string& expression) {
    // Remove leading and trailing spaces.
    
    int start, end;
71
    for (start = 0; start < (int) expression.size() && isspace(expression[start]); start++)
72
        ;
73
    for (end = expression.size()-1; end > start && isspace(expression[end]); end--)
74
        ;
75
    if (start == end && isspace(expression[end]))
76
77
78
79
80
        return "";
    return expression.substr(start, end-start+1);
}

ParseToken Parser::getNextToken(const string& expression, int start) {
81
82
83
84
85
86
87
88
89
    char c = expression[start];
    if (c == '(')
        return ParseToken("(", ParseToken::LeftParen);
    if (c == ')')
        return ParseToken(")", ParseToken::RightParen);
    if (c == ',')
        return ParseToken(",", ParseToken::Comma);
    if (Operators.find(c) != string::npos)
        return ParseToken(string(1, c), ParseToken::Operator);
90
    if (isspace(c)) {
91
92
        // White space

93
        for (int pos = start+1; pos < (int) expression.size(); pos++) {
94
            if (!isspace(expression[pos]))
95
96
97
98
99
100
101
102
103
104
                return ParseToken(expression.substr(start, pos-start), ParseToken::Whitespace);
        }
        return ParseToken(expression.substr(start, string::npos), ParseToken::Whitespace);
    }
    if (c == '.' || Digits.find(c) != string::npos) {
        // A number

        bool foundDecimal = (c == '.');
        bool foundExp = false;
        int pos;
105
        for (pos = start+1; pos < (int) expression.size(); pos++) {
106
107
108
109
110
111
112
113
114
            c = expression[pos];
            if (Digits.find(c) != string::npos)
                continue;
            if (c == '.' && !foundDecimal) {
                foundDecimal = true;
                continue;
            }
            if ((c == 'e' || c == 'E') && !foundExp) {
                foundExp = true;
115
                if (pos < (int) expression.size()-1 && (expression[pos+1] == '-' || expression[pos+1] == '+'))
116
117
118
119
120
121
122
123
124
125
                    pos++;
                continue;
            }
            break;
        }
        return ParseToken(expression.substr(start, pos-start), ParseToken::Number);
    }

    // A variable, function, or left parenthesis

126
    for (int pos = start; pos < (int) expression.size(); pos++) {
127
128
129
        c = expression[pos];
        if (c == '(')
            return ParseToken(expression.substr(start, pos-start+1), ParseToken::Function);
130
        if (Operators.find(c) != string::npos || c == ',' || c == ')' || isspace(c))
131
132
133
134
135
            return ParseToken(expression.substr(start, pos-start), ParseToken::Variable);
    }
    return ParseToken(expression.substr(start, string::npos), ParseToken::Variable);
}

136
vector<ParseToken> Parser::tokenize(const string& expression) {
137
138
    vector<ParseToken> tokens;
    int pos = 0;
139
    while (pos < (int) expression.size()) {
140
141
142
143
144
145
146
147
        ParseToken token = getNextToken(expression, pos);
        if (token.getType() != ParseToken::Whitespace)
            tokens.push_back(token);
        pos += token.getText().size();
    }
    return tokens;
}

Peter Eastman's avatar
Peter Eastman committed
148
149
150
151
152
ParsedExpression Parser::parse(const string& expression) {
    return parse(expression, map<string, CustomFunction*>());
}

ParsedExpression Parser::parse(const string& expression, const map<string, CustomFunction*>& customFunctions) {
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
    // First split the expression into subexpressions.

    string primaryExpression = expression;
    vector<string> subexpressions;
    while (true) {
        string::size_type pos = primaryExpression.find_last_of(';');
        if (pos == string::npos)
            break;
        string sub = trim(primaryExpression.substr(pos+1));
        if (sub.size() > 0)
            subexpressions.push_back(sub);
        primaryExpression = primaryExpression.substr(0, pos);
    }

    // Parse the subexpressions.

    map<string, ExpressionTreeNode> subexpDefs;
    for (int i = 0; i < (int) subexpressions.size(); i++) {
        string::size_type equalsPos = subexpressions[i].find('=');
        if (equalsPos == string::npos)
            throw Exception("Parse error: subexpression does not specify a name");
        string name = trim(subexpressions[i].substr(0, equalsPos));
        if (name.size() == 0)
            throw Exception("Parse error: subexpression does not specify a name");
        vector<ParseToken> tokens = tokenize(subexpressions[i].substr(equalsPos+1));
        int pos = 0;
        subexpDefs[name] = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
        if (pos != tokens.size())
181
            throw Exception("Parse error: unexpected text at end of subexpression: "+tokens[pos].getText());
182
183
184
185
186
    }

    // Now parse the primary expression.

    vector<ParseToken> tokens = tokenize(primaryExpression);
187
    int pos = 0;
188
    ExpressionTreeNode result = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
189
    if (pos != tokens.size())
190
        throw Exception("Parse error: unexpected text at end of expression: "+tokens[pos].getText());
191
192
193
    return ParsedExpression(result);
}

194
195
ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int& pos, const map<string, CustomFunction*>& customFunctions,
            const map<string, ExpressionTreeNode>& subexpressionDefs, int precedence) {
196
197
198
199
200
201
202
203
204
205
206
207
208
209
    if (pos == tokens.size())
        throw Exception("Parse error: unexpected end of expression");

    // Parse the next value (number, variable, function, parenthesized expression)

    ParseToken token = tokens[pos];
    ExpressionTreeNode result;
    if (token.getType() == ParseToken::Number) {
        double value;
        stringstream(token.getText()) >> value;
        result = ExpressionTreeNode(new Operation::Constant(value));
        pos++;
    }
    else if (token.getType() == ParseToken::Variable) {
210
211
212
213
214
215
216
        map<string, ExpressionTreeNode>::const_iterator subexp = subexpressionDefs.find(token.getText());
        if (subexp == subexpressionDefs.end()) {
            Operation* op = new Operation::Variable(token.getText());
            result = ExpressionTreeNode(op);
        }
        else
            result = subexp->second;
217
218
219
220
        pos++;
    }
    else if (token.getType() == ParseToken::LeftParen) {
        pos++;
221
        result = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0);
222
        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
Peter Eastman's avatar
Peter Eastman committed
223
            throw Exception("Parse error: unbalanced parentheses");
224
225
226
227
228
229
230
        pos++;
    }
    else if (token.getType() == ParseToken::Function) {
        pos++;
        vector<ExpressionTreeNode> args;
        bool moreArgs;
        do {
231
            args.push_back(parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0));
232
            moreArgs = (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Comma);
233
234
235
236
            if (moreArgs)
                pos++;
        } while (moreArgs);
        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
Peter Eastman's avatar
Peter Eastman committed
237
            throw Exception("Parse error: unbalanced parentheses");
238
        pos++;
Peter Eastman's avatar
Peter Eastman committed
239
240
241
242
243
244
245
246
        Operation* op = getFunctionOperation(token.getText(), customFunctions);
        try {
            result = ExpressionTreeNode(op, args);
        }
        catch (...) {
            delete op;
            throw;
        }
247
248
249
    }
    else if (token.getType() == ParseToken::Operator && token.getText() == "-") {
        pos++;
250
        ExpressionTreeNode toNegate = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 2);
251
252
253
        result = ExpressionTreeNode(new Operation::Negate(), toNegate);
    }
    else
254
        throw Exception("Parse error: unexpected token: "+token.getText());
255
256
257

    // Now deal with the next binary operator.

258
    while (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Operator) {
259
        token = tokens[pos];
Peter Eastman's avatar
Peter Eastman committed
260
261
        int opIndex = Operators.find(token.getText());
        int opPrecedence = Precedence[opIndex];
262
263
264
        if (opPrecedence < precedence)
            return result;
        pos++;
Peter Eastman's avatar
Peter Eastman committed
265
266
267
268
269
270
271
272
273
        ExpressionTreeNode arg = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, LeftAssociative[opIndex] ? opPrecedence+1 : opPrecedence);
        Operation* op = getOperatorOperation(token.getText());
        try {
            result = ExpressionTreeNode(op, result, arg);
        }
        catch (...) {
            delete op;
            throw;
        }
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
    }
    return result;
}

Operation* Parser::getOperatorOperation(const std::string& name) {
    switch (OperationId[Operators.find(name)]) {
        case Operation::ADD:
            return new Operation::Add();
        case Operation::SUBTRACT:
            return new Operation::Subtract();
        case Operation::MULTIPLY:
            return new Operation::Multiply();
        case Operation::DIVIDE:
            return new Operation::Divide();
        case Operation::POWER:
            return new Operation::Power();
        default:
            throw Exception("Parse error: unknown operator");
    }
}

Peter Eastman's avatar
Peter Eastman committed
295
Operation* Parser::getFunctionOperation(const std::string& name, const map<string, CustomFunction*>& customFunctions) {
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310

    static map<string, Operation::Id> opMap;
    if (opMap.size() == 0) {
        opMap["sqrt"] = Operation::SQRT;
        opMap["exp"] = Operation::EXP;
        opMap["log"] = Operation::LOG;
        opMap["sin"] = Operation::SIN;
        opMap["cos"] = Operation::COS;
        opMap["sec"] = Operation::SEC;
        opMap["csc"] = Operation::CSC;
        opMap["tan"] = Operation::TAN;
        opMap["cot"] = Operation::COT;
        opMap["asin"] = Operation::ASIN;
        opMap["acos"] = Operation::ACOS;
        opMap["atan"] = Operation::ATAN;
311
312
313
        opMap["sinh"] = Operation::SINH;
        opMap["cosh"] = Operation::COSH;
        opMap["tanh"] = Operation::TANH;
314
315
        opMap["erf"] = Operation::ERF;
        opMap["erfc"] = Operation::ERFC;
316
        opMap["step"] = Operation::STEP;
317
        opMap["delta"] = Operation::DELTA;
318
319
320
        opMap["square"] = Operation::SQUARE;
        opMap["cube"] = Operation::CUBE;
        opMap["recip"] = Operation::RECIPROCAL;
321
322
323
        opMap["min"] = Operation::MIN;
        opMap["max"] = Operation::MAX;
        opMap["abs"] = Operation::ABS;
324
325
    }
    string trimmed = name.substr(0, name.size()-1);
Peter Eastman's avatar
Peter Eastman committed
326
327
328
329
330
331
332
333
334

    // First check custom functions.

    map<string, CustomFunction*>::const_iterator custom = customFunctions.find(trimmed);
    if (custom != customFunctions.end())
        return new Operation::Custom(trimmed, custom->second->clone());

    // Now try standard functions.

335
336
    map<string, Operation::Id>::const_iterator iter = opMap.find(trimmed);
    if (iter == opMap.end())
337
        throw Exception("Parse error: unknown function: "+trimmed);
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
    switch (iter->second) {
        case Operation::SQRT:
            return new Operation::Sqrt();
        case Operation::EXP:
            return new Operation::Exp();
        case Operation::LOG:
            return new Operation::Log();
        case Operation::SIN:
            return new Operation::Sin();
        case Operation::COS:
            return new Operation::Cos();
        case Operation::SEC:
            return new Operation::Sec();
        case Operation::CSC:
            return new Operation::Csc();
        case Operation::TAN:
            return new Operation::Tan();
        case Operation::COT:
            return new Operation::Cot();
        case Operation::ASIN:
            return new Operation::Asin();
        case Operation::ACOS:
            return new Operation::Acos();
        case Operation::ATAN:
            return new Operation::Atan();
363
364
365
366
367
368
        case Operation::SINH:
            return new Operation::Sinh();
        case Operation::COSH:
            return new Operation::Cosh();
        case Operation::TANH:
            return new Operation::Tanh();
369
370
371
372
        case Operation::ERF:
            return new Operation::Erf();
        case Operation::ERFC:
            return new Operation::Erfc();
373
374
        case Operation::STEP:
            return new Operation::Step();
375
376
        case Operation::DELTA:
            return new Operation::Delta();
377
378
379
380
381
382
        case Operation::SQUARE:
            return new Operation::Square();
        case Operation::CUBE:
            return new Operation::Cube();
        case Operation::RECIPROCAL:
            return new Operation::Reciprocal();
383
384
385
386
387
388
        case Operation::MIN:
            return new Operation::Min();
        case Operation::MAX:
            return new Operation::Max();
        case Operation::ABS:
            return new Operation::Abs();
389
390
391
392
        default:
            throw Exception("Parse error: unknown function");
    }
}