"vscode:/vscode.git/clone" did not exist on "c4bb5264acacf55f8f1e693ccd29f7a13d3bcba0"
Parser.cpp 16.1 KB
Newer Older
1
2
3
4
5
6
7
8
/* -------------------------------------------------------------------------- *
 *                                   Lepton                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the Lepton expression parser originating from              *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
9
 * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * Permission is hereby granted, free of charge, to any person obtaining a    *
 * copy of this software and associated documentation files (the "Software"), *
 * to deal in the Software without restriction, including without limitation  *
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
 * and/or sell copies of the Software, and to permit persons to whom the      *
 * Software is furnished to do so, subject to the following conditions:       *
 *                                                                            *
 * The above copyright notice and this permission notice shall be included in *
 * all copies or substantial portions of the Software.                        *
 *                                                                            *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

32
33
34
35
36
37
#include "lepton/Parser.h"
#include "lepton/CustomFunction.h"
#include "lepton/Exception.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/Operation.h"
#include "lepton/ParsedExpression.h"
38
#include <cctype>
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#include <iostream>

using namespace Lepton;
using namespace std;

static const string Digits = "0123456789";
static const string Operators = "+-*/^";
static const bool LeftAssociative[] = {true, true, true, true, false};
static const int Precedence[] = {0, 0, 1, 1, 3};
static const Operation::Id OperationId[] = {Operation::ADD, Operation::SUBTRACT, Operation::MULTIPLY, Operation::DIVIDE, Operation::POWER};

class Lepton::ParseToken {
public:
    enum Type {Number, Operator, Variable, Function, LeftParen, RightParen, Comma, Whitespace};

    ParseToken(string text, Type type) : text(text), type(type) {
    }
    const string& getText() const {
        return text;
    }
    Type getType() const {
        return type;
    }
private:
    string text;
    Type type;
};

67
68
69
70
string Parser::trim(const string& expression) {
    // Remove leading and trailing spaces.
    
    int start, end;
71
    for (start = 0; start < (int) expression.size() && isspace(expression[start]); start++)
72
        ;
73
    for (end = (int) expression.size()-1; end > start && isspace(expression[end]); end--)
74
        ;
75
    if (start == end && isspace(expression[end]))
76
77
78
79
80
        return "";
    return expression.substr(start, end-start+1);
}

ParseToken Parser::getNextToken(const string& expression, int start) {
81
82
83
84
85
86
87
88
89
    char c = expression[start];
    if (c == '(')
        return ParseToken("(", ParseToken::LeftParen);
    if (c == ')')
        return ParseToken(")", ParseToken::RightParen);
    if (c == ',')
        return ParseToken(",", ParseToken::Comma);
    if (Operators.find(c) != string::npos)
        return ParseToken(string(1, c), ParseToken::Operator);
90
    if (isspace(c)) {
91
92
        // White space

93
        for (int pos = start+1; pos < (int) expression.size(); pos++) {
94
            if (!isspace(expression[pos]))
95
96
97
98
99
100
101
102
103
104
                return ParseToken(expression.substr(start, pos-start), ParseToken::Whitespace);
        }
        return ParseToken(expression.substr(start, string::npos), ParseToken::Whitespace);
    }
    if (c == '.' || Digits.find(c) != string::npos) {
        // A number

        bool foundDecimal = (c == '.');
        bool foundExp = false;
        int pos;
105
        for (pos = start+1; pos < (int) expression.size(); pos++) {
106
107
108
109
110
111
112
113
114
            c = expression[pos];
            if (Digits.find(c) != string::npos)
                continue;
            if (c == '.' && !foundDecimal) {
                foundDecimal = true;
                continue;
            }
            if ((c == 'e' || c == 'E') && !foundExp) {
                foundExp = true;
115
                if (pos < (int) expression.size()-1 && (expression[pos+1] == '-' || expression[pos+1] == '+'))
116
117
118
119
120
121
122
123
124
125
                    pos++;
                continue;
            }
            break;
        }
        return ParseToken(expression.substr(start, pos-start), ParseToken::Number);
    }

    // A variable, function, or left parenthesis

126
    for (int pos = start; pos < (int) expression.size(); pos++) {
127
128
129
        c = expression[pos];
        if (c == '(')
            return ParseToken(expression.substr(start, pos-start+1), ParseToken::Function);
130
        if (Operators.find(c) != string::npos || c == ',' || c == ')' || isspace(c))
131
132
133
134
135
            return ParseToken(expression.substr(start, pos-start), ParseToken::Variable);
    }
    return ParseToken(expression.substr(start, string::npos), ParseToken::Variable);
}

136
vector<ParseToken> Parser::tokenize(const string& expression) {
137
138
    vector<ParseToken> tokens;
    int pos = 0;
139
    while (pos < (int) expression.size()) {
140
141
142
        ParseToken token = getNextToken(expression, pos);
        if (token.getType() != ParseToken::Whitespace)
            tokens.push_back(token);
143
        pos += (int) token.getText().size();
144
145
146
147
    }
    return tokens;
}

Peter Eastman's avatar
Peter Eastman committed
148
149
150
151
152
ParsedExpression Parser::parse(const string& expression) {
    return parse(expression, map<string, CustomFunction*>());
}

ParsedExpression Parser::parse(const string& expression, const map<string, CustomFunction*>& customFunctions) {
153
154
    try {
        // First split the expression into subexpressions.
155

156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
        string primaryExpression = expression;
        vector<string> subexpressions;
        while (true) {
            string::size_type pos = primaryExpression.find_last_of(';');
            if (pos == string::npos)
                break;
            string sub = trim(primaryExpression.substr(pos+1));
            if (sub.size() > 0)
                subexpressions.push_back(sub);
            primaryExpression = primaryExpression.substr(0, pos);
        }

        // Parse the subexpressions.

        map<string, ExpressionTreeNode> subexpDefs;
        for (int i = 0; i < (int) subexpressions.size(); i++) {
            string::size_type equalsPos = subexpressions[i].find('=');
            if (equalsPos == string::npos)
                throw Exception("subexpression does not specify a name");
            string name = trim(subexpressions[i].substr(0, equalsPos));
            if (name.size() == 0)
                throw Exception("subexpression does not specify a name");
            vector<ParseToken> tokens = tokenize(subexpressions[i].substr(equalsPos+1));
            int pos = 0;
            subexpDefs[name] = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
            if (pos != tokens.size())
                throw Exception("unexpected text at end of subexpression: "+tokens[pos].getText());
        }
184

185
        // Now parse the primary expression.
186

187
        vector<ParseToken> tokens = tokenize(primaryExpression);
188
        int pos = 0;
189
        ExpressionTreeNode result = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
190
        if (pos != tokens.size())
191
192
193
194
195
            throw Exception("unexpected text at end of expression: "+tokens[pos].getText());
        return ParsedExpression(result);
    }
    catch (Exception& ex) {
        throw Exception("Parse error in expression \""+expression+"\": "+ex.what());
196
    }
197
198
}

199
200
ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int& pos, const map<string, CustomFunction*>& customFunctions,
            const map<string, ExpressionTreeNode>& subexpressionDefs, int precedence) {
201
    if (pos == tokens.size())
202
        throw Exception("unexpected end of expression");
203
204
205
206
207
208
209
210
211
212
213
214

    // Parse the next value (number, variable, function, parenthesized expression)

    ParseToken token = tokens[pos];
    ExpressionTreeNode result;
    if (token.getType() == ParseToken::Number) {
        double value;
        stringstream(token.getText()) >> value;
        result = ExpressionTreeNode(new Operation::Constant(value));
        pos++;
    }
    else if (token.getType() == ParseToken::Variable) {
215
216
217
218
219
220
221
        map<string, ExpressionTreeNode>::const_iterator subexp = subexpressionDefs.find(token.getText());
        if (subexp == subexpressionDefs.end()) {
            Operation* op = new Operation::Variable(token.getText());
            result = ExpressionTreeNode(op);
        }
        else
            result = subexp->second;
222
223
224
225
        pos++;
    }
    else if (token.getType() == ParseToken::LeftParen) {
        pos++;
226
        result = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0);
227
        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
228
            throw Exception("unbalanced parentheses");
229
230
231
232
233
234
235
        pos++;
    }
    else if (token.getType() == ParseToken::Function) {
        pos++;
        vector<ExpressionTreeNode> args;
        bool moreArgs;
        do {
236
            args.push_back(parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0));
237
            moreArgs = (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Comma);
238
239
240
241
            if (moreArgs)
                pos++;
        } while (moreArgs);
        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
242
            throw Exception("unbalanced parentheses");
243
        pos++;
Peter Eastman's avatar
Peter Eastman committed
244
245
246
247
248
249
250
251
        Operation* op = getFunctionOperation(token.getText(), customFunctions);
        try {
            result = ExpressionTreeNode(op, args);
        }
        catch (...) {
            delete op;
            throw;
        }
252
253
254
    }
    else if (token.getType() == ParseToken::Operator && token.getText() == "-") {
        pos++;
255
        ExpressionTreeNode toNegate = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 2);
256
257
258
        result = ExpressionTreeNode(new Operation::Negate(), toNegate);
    }
    else
259
        throw Exception("unexpected token: "+token.getText());
260
261
262

    // Now deal with the next binary operator.

263
    while (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Operator) {
264
        token = tokens[pos];
265
        int opIndex = (int) Operators.find(token.getText());
Peter Eastman's avatar
Peter Eastman committed
266
        int opPrecedence = Precedence[opIndex];
267
268
269
        if (opPrecedence < precedence)
            return result;
        pos++;
Peter Eastman's avatar
Peter Eastman committed
270
271
272
273
274
275
276
277
278
        ExpressionTreeNode arg = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, LeftAssociative[opIndex] ? opPrecedence+1 : opPrecedence);
        Operation* op = getOperatorOperation(token.getText());
        try {
            result = ExpressionTreeNode(op, result, arg);
        }
        catch (...) {
            delete op;
            throw;
        }
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
    }
    return result;
}

Operation* Parser::getOperatorOperation(const std::string& name) {
    switch (OperationId[Operators.find(name)]) {
        case Operation::ADD:
            return new Operation::Add();
        case Operation::SUBTRACT:
            return new Operation::Subtract();
        case Operation::MULTIPLY:
            return new Operation::Multiply();
        case Operation::DIVIDE:
            return new Operation::Divide();
        case Operation::POWER:
            return new Operation::Power();
        default:
296
            throw Exception("unknown operator");
297
298
299
    }
}

Peter Eastman's avatar
Peter Eastman committed
300
Operation* Parser::getFunctionOperation(const std::string& name, const map<string, CustomFunction*>& customFunctions) {
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315

    static map<string, Operation::Id> opMap;
    if (opMap.size() == 0) {
        opMap["sqrt"] = Operation::SQRT;
        opMap["exp"] = Operation::EXP;
        opMap["log"] = Operation::LOG;
        opMap["sin"] = Operation::SIN;
        opMap["cos"] = Operation::COS;
        opMap["sec"] = Operation::SEC;
        opMap["csc"] = Operation::CSC;
        opMap["tan"] = Operation::TAN;
        opMap["cot"] = Operation::COT;
        opMap["asin"] = Operation::ASIN;
        opMap["acos"] = Operation::ACOS;
        opMap["atan"] = Operation::ATAN;
316
        opMap["atan2"] = Operation::ATAN2;
317
318
319
        opMap["sinh"] = Operation::SINH;
        opMap["cosh"] = Operation::COSH;
        opMap["tanh"] = Operation::TANH;
320
321
        opMap["erf"] = Operation::ERF;
        opMap["erfc"] = Operation::ERFC;
322
        opMap["step"] = Operation::STEP;
323
        opMap["delta"] = Operation::DELTA;
324
325
326
        opMap["square"] = Operation::SQUARE;
        opMap["cube"] = Operation::CUBE;
        opMap["recip"] = Operation::RECIPROCAL;
327
328
329
        opMap["min"] = Operation::MIN;
        opMap["max"] = Operation::MAX;
        opMap["abs"] = Operation::ABS;
330
331
        opMap["floor"] = Operation::FLOOR;
        opMap["ceil"] = Operation::CEIL;
332
        opMap["select"] = Operation::SELECT;
333
334
    }
    string trimmed = name.substr(0, name.size()-1);
Peter Eastman's avatar
Peter Eastman committed
335
336
337
338
339
340
341
342
343

    // First check custom functions.

    map<string, CustomFunction*>::const_iterator custom = customFunctions.find(trimmed);
    if (custom != customFunctions.end())
        return new Operation::Custom(trimmed, custom->second->clone());

    // Now try standard functions.

344
345
    map<string, Operation::Id>::const_iterator iter = opMap.find(trimmed);
    if (iter == opMap.end())
346
        throw Exception("unknown function: "+trimmed);
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
    switch (iter->second) {
        case Operation::SQRT:
            return new Operation::Sqrt();
        case Operation::EXP:
            return new Operation::Exp();
        case Operation::LOG:
            return new Operation::Log();
        case Operation::SIN:
            return new Operation::Sin();
        case Operation::COS:
            return new Operation::Cos();
        case Operation::SEC:
            return new Operation::Sec();
        case Operation::CSC:
            return new Operation::Csc();
        case Operation::TAN:
            return new Operation::Tan();
        case Operation::COT:
            return new Operation::Cot();
        case Operation::ASIN:
            return new Operation::Asin();
        case Operation::ACOS:
            return new Operation::Acos();
        case Operation::ATAN:
            return new Operation::Atan();
372
373
        case Operation::ATAN2:
            return new Operation::Atan2();
374
375
376
377
378
379
        case Operation::SINH:
            return new Operation::Sinh();
        case Operation::COSH:
            return new Operation::Cosh();
        case Operation::TANH:
            return new Operation::Tanh();
380
381
382
383
        case Operation::ERF:
            return new Operation::Erf();
        case Operation::ERFC:
            return new Operation::Erfc();
384
385
        case Operation::STEP:
            return new Operation::Step();
386
387
        case Operation::DELTA:
            return new Operation::Delta();
388
389
390
391
392
393
        case Operation::SQUARE:
            return new Operation::Square();
        case Operation::CUBE:
            return new Operation::Cube();
        case Operation::RECIPROCAL:
            return new Operation::Reciprocal();
394
395
396
397
398
399
        case Operation::MIN:
            return new Operation::Min();
        case Operation::MAX:
            return new Operation::Max();
        case Operation::ABS:
            return new Operation::Abs();
400
401
402
403
        case Operation::FLOOR:
            return new Operation::Floor();
        case Operation::CEIL:
            return new Operation::Ceil();
404
405
        case Operation::SELECT:
            return new Operation::Select();
406
        default:
407
            throw Exception("unknown function");
408
409
    }
}