convert_to_json.cpp 2.53 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#include <algorithm>
#include <string>
#include <vector>
#include <functional>
#include <sstream>
#include <migraphx/errors.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/convert_to_json.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

using token = std::pair<const char*, const char*>;
using lexer = std::function<const char*(const char* start, const char* end)>;

template <class P>
auto lex_while(P p)
{
    return [=](const char* start, const char* end) {
        return std::find_if(start, end, [&](char c) { return not p(c); });
    };
}

template <class P>
auto lex_if(P p)
{
    return [=](const char* start, const char*) {
        if(p(*start))
            return start + 1;
        return start;
    };
}

std::vector<token> tokenize(const char* start, const char* end, const std::vector<lexer>& lexers)
{
    std::vector<token> result;
    while(start != end)
    {
        bool error = true;
        for(const auto& l : lexers)
        {
            const auto* next = l(start, end);
            if(next != start)
            {
                result.emplace_back(start, next);
                start = next;
                error = false;
                break;
            }
        }

        if(error)
        {
            MIGRAPHX_THROW("TOKENIZE: no token found!");
        }
    }

    return result;
}

std::vector<token> json_tokenize(const std::string& s)
{
    std::vector<lexer> lexers;

    // Quote
    lexers.push_back([](const char* start, const char* end) {
        if(*start != '\"')
            return start;
        ++start;
        while((start != end) and (*start != '\"'))
        {
            if(*start == '\\')
                start++;
            start++;
        }

        return ++start;
    });

    lexers.push_back(lex_while(&isspace));

    // Punctation
    lexers.push_back(lex_if(&ispunct));

    // Identifier/number
    lexers.push_back(lex_while([](char c) {
        return (isalnum(c) != 0 or contains({'_', '.', '+'}, c));
    }));

    return tokenize(s.data(), s.data() + s.length(), lexers);
}

std::string convert_to_json(const std::string& str)
{
    auto tokens = json_tokenize(str);
    std::stringstream ss;

    for(auto& token : tokens)
    {
        std::string s(token.first, token.second);
        if(std::isalpha(s.front()) != 0 and
           not contains({"null", "nan", "true", "false", "inf"}, s))
        {
            ss << "\"" << s << "\"";
        }
        else
        {
            ss << s;
        }
    }

    return ss.str();
}

} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx