parser.go 5.89 KB
Newer Older
1
package parser
2
3
4

import (
	"bufio"
5
	"bytes"
6
	"errors"
7
	"fmt"
8
	"io"
Michael Yang's avatar
Michael Yang committed
9
10
	"strconv"
	"strings"
Michael Yang's avatar
Michael Yang committed
11
12
13

	"golang.org/x/text/encoding/unicode"
	"golang.org/x/text/transform"
14
15
)

Michael Yang's avatar
Michael Yang committed
16
17
18
19
20
21
22
23
24
25
26
27
28
type File struct {
	Commands []Command
}

func (f File) String() string {
	var sb strings.Builder
	for _, cmd := range f.Commands {
		fmt.Fprintln(&sb, cmd.String())
	}

	return sb.String()
}

29
30
type Command struct {
	Name string
31
32
33
	Args string
}

Michael Yang's avatar
Michael Yang committed
34
func (c Command) String() string {
Michael Yang's avatar
Michael Yang committed
35
	var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
36
37
	switch c.Name {
	case "model":
Michael Yang's avatar
Michael Yang committed
38
		fmt.Fprintf(&sb, "FROM %s", c.Args)
Michael Yang's avatar
Michael Yang committed
39
	case "license", "template", "system", "adapter":
Michael Yang's avatar
Michael Yang committed
40
		fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args))
Michael Yang's avatar
Michael Yang committed
41
42
	case "message":
		role, message, _ := strings.Cut(c.Args, ": ")
Michael Yang's avatar
Michael Yang committed
43
		fmt.Fprintf(&sb, "MESSAGE %s %s", role, quote(message))
Michael Yang's avatar
Michael Yang committed
44
	default:
Michael Yang's avatar
Michael Yang committed
45
		fmt.Fprintf(&sb, "PARAMETER %s %s", c.Name, quote(c.Args))
Michael Yang's avatar
Michael Yang committed
46
47
	}

Michael Yang's avatar
Michael Yang committed
48
	return sb.String()
Michael Yang's avatar
Michael Yang committed
49
50
}

Michael Yang's avatar
Michael Yang committed
51
type state int
52

Michael Yang's avatar
Michael Yang committed
53
54
55
56
57
58
59
60
const (
	stateNil state = iota
	stateName
	stateValue
	stateParameter
	stateMessage
	stateComment
)
61

Michael Yang's avatar
tests  
Michael Yang committed
62
var (
Michael Yang's avatar
Michael Yang committed
63
64
65
	errMissingFrom        = errors.New("no FROM line")
	errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"")
	errInvalidCommand     = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
Michael Yang's avatar
tests  
Michael Yang committed
66
)
Michael Yang's avatar
Michael Yang committed
67

Michael Yang's avatar
Michael Yang committed
68
func ParseFile(r io.Reader) (*File, error) {
Michael Yang's avatar
Michael Yang committed
69
70
71
72
73
	var cmd Command
	var curr state
	var b bytes.Buffer
	var role string

Michael Yang's avatar
Michael Yang committed
74
75
	var f File

Michael Yang's avatar
Michael Yang committed
76
77
78
	tr := unicode.BOMOverride(unicode.UTF8.NewDecoder())
	br := bufio.NewReader(transform.NewReader(r, tr))

Michael Yang's avatar
Michael Yang committed
79
80
81
82
83
84
85
	for {
		r, _, err := br.ReadRune()
		if errors.Is(err, io.EOF) {
			break
		} else if err != nil {
			return nil, err
		}
86

Michael Yang's avatar
Michael Yang committed
87
88
89
90
91
		next, r, err := parseRuneForState(r, curr)
		if errors.Is(err, io.ErrUnexpectedEOF) {
			return nil, fmt.Errorf("%w: %s", err, b.String())
		} else if err != nil {
			return nil, err
92
93
		}

Michael Yang's avatar
Michael Yang committed
94
		// process the state transition, some transitions need to be intercepted and redirected
Michael Yang's avatar
Michael Yang committed
95
96
		if next != curr {
			switch curr {
Michael Yang's avatar
Michael Yang committed
97
98
99
100
101
			case stateName:
				if !isValidCommand(b.String()) {
					return nil, errInvalidCommand
				}

Michael Yang's avatar
Michael Yang committed
102
				// next state sometimes depends on the current buffer value
Michael Yang's avatar
Michael Yang committed
103
104
105
106
				switch s := strings.ToLower(b.String()); s {
				case "from":
					cmd.Name = "model"
				case "parameter":
Michael Yang's avatar
Michael Yang committed
107
					// transition to stateParameter which sets command name
Michael Yang's avatar
Michael Yang committed
108
109
					next = stateParameter
				case "message":
Michael Yang's avatar
Michael Yang committed
110
					// transition to stateMessage which validates the message role
Michael Yang's avatar
Michael Yang committed
111
112
113
114
115
					next = stateMessage
					fallthrough
				default:
					cmd.Name = s
				}
Michael Yang's avatar
Michael Yang committed
116
117
			case stateParameter:
				cmd.Name = b.String()
Michael Yang's avatar
Michael Yang committed
118
			case stateMessage:
Michael Yang's avatar
Michael Yang committed
119
				if !isValidMessageRole(b.String()) {
Michael Yang's avatar
Michael Yang committed
120
					return nil, errInvalidMessageRole
Michael Yang's avatar
Michael Yang committed
121
122
123
124
125
126
				}

				role = b.String()
			case stateComment, stateNil:
				// pass
			case stateValue:
Josh Yan's avatar
Josh Yan committed
127
				s, ok := unquote(strings.TrimSpace(b.String()))
Michael Yang's avatar
Michael Yang committed
128
129
130
131
132
133
134
135
136
137
138
139
140
141
				if !ok || isSpace(r) {
					if _, err := b.WriteRune(r); err != nil {
						return nil, err
					}

					continue
				}

				if role != "" {
					s = role + ": " + s
					role = ""
				}

				cmd.Args = s
Michael Yang's avatar
Michael Yang committed
142
				f.Commands = append(f.Commands, cmd)
Michael Yang's avatar
Michael Yang committed
143
144
			}

Michael Yang's avatar
Michael Yang committed
145
146
147
148
149
150
151
			b.Reset()
			curr = next
		}

		if strconv.IsPrint(r) {
			if _, err := b.WriteRune(r); err != nil {
				return nil, err
Michael Yang's avatar
Michael Yang committed
152
			}
Michael Yang's avatar
Michael Yang committed
153
154
155
156
157
158
159
160
		}
	}

	// flush the buffer
	switch curr {
	case stateComment, stateNil:
		// pass; nothing to flush
	case stateValue:
Josh Yan's avatar
Josh Yan committed
161
		s, ok := unquote(strings.TrimSpace(b.String()))
Michael Yang's avatar
Michael Yang committed
162
		if !ok {
Michael Yang's avatar
Michael Yang committed
163
			return nil, io.ErrUnexpectedEOF
164
		}
165

Michael Yang's avatar
Michael Yang committed
166
167
168
169
170
		if role != "" {
			s = role + ": " + s
		}

		cmd.Args = s
Michael Yang's avatar
Michael Yang committed
171
		f.Commands = append(f.Commands, cmd)
Michael Yang's avatar
Michael Yang committed
172
173
	default:
		return nil, io.ErrUnexpectedEOF
174
175
	}

Michael Yang's avatar
Michael Yang committed
176
	for _, cmd := range f.Commands {
Michael Yang's avatar
Michael Yang committed
177
		if cmd.Name == "model" {
Michael Yang's avatar
Michael Yang committed
178
			return &f, nil
Michael Yang's avatar
Michael Yang committed
179
		}
180
181
	}

Michael Yang's avatar
tests  
Michael Yang committed
182
	return nil, errMissingFrom
183
}
184

Michael Yang's avatar
Michael Yang committed
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
func parseRuneForState(r rune, cs state) (state, rune, error) {
	switch cs {
	case stateNil:
		switch {
		case r == '#':
			return stateComment, 0, nil
		case isSpace(r), isNewline(r):
			return stateNil, 0, nil
		default:
			return stateName, r, nil
		}
	case stateName:
		switch {
		case isAlpha(r):
			return stateName, r, nil
		case isSpace(r):
			return stateValue, 0, nil
		default:
Michael Yang's avatar
Michael Yang committed
203
			return stateNil, 0, errInvalidCommand
Michael Yang's avatar
Michael Yang committed
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
		}
	case stateValue:
		switch {
		case isNewline(r):
			return stateNil, r, nil
		case isSpace(r):
			return stateNil, r, nil
		default:
			return stateValue, r, nil
		}
	case stateParameter:
		switch {
		case isAlpha(r), isNumber(r), r == '_':
			return stateParameter, r, nil
		case isSpace(r):
			return stateValue, 0, nil
		default:
			return stateNil, 0, io.ErrUnexpectedEOF
		}
	case stateMessage:
		switch {
		case isAlpha(r):
			return stateMessage, r, nil
		case isSpace(r):
			return stateValue, 0, nil
		default:
			return stateNil, 0, io.ErrUnexpectedEOF
		}
	case stateComment:
		switch {
		case isNewline(r):
			return stateNil, 0, nil
		default:
			return stateComment, 0, nil
		}
	default:
		return stateNil, 0, errors.New("")
241
	}
Michael Yang's avatar
Michael Yang committed
242
}
243

Michael Yang's avatar
Michael Yang committed
244
func quote(s string) string {
245
	if strings.Contains(s, "\n") || strings.HasPrefix(s, " ") || strings.HasSuffix(s, " ") {
Michael Yang's avatar
Michael Yang committed
246
247
248
249
		if strings.Contains(s, "\"") {
			return `"""` + s + `"""`
		}

250
		return `"` + s + `"`
Michael Yang's avatar
Michael Yang committed
251
252
253
254
255
	}

	return s
}

Michael Yang's avatar
Michael Yang committed
256
257
258
259
260
261
262
263
func unquote(s string) (string, bool) {
	// TODO: single quotes
	if len(s) >= 3 && s[:3] == `"""` {
		if len(s) >= 6 && s[len(s)-3:] == `"""` {
			return s[3 : len(s)-3], true
		}

		return "", false
264
265
	}

Michael Yang's avatar
Michael Yang committed
266
267
268
269
270
271
	if len(s) >= 1 && s[0] == '"' {
		if len(s) >= 2 && s[len(s)-1] == '"' {
			return s[1 : len(s)-1], true
		}

		return "", false
272
273
	}

Michael Yang's avatar
Michael Yang committed
274
	return s, true
275
276
}

Michael Yang's avatar
Michael Yang committed
277
278
279
func isAlpha(r rune) bool {
	return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
}
280

Michael Yang's avatar
Michael Yang committed
281
282
283
func isNumber(r rune) bool {
	return r >= '0' && r <= '9'
}
284

Michael Yang's avatar
Michael Yang committed
285
286
287
func isSpace(r rune) bool {
	return r == ' ' || r == '\t'
}
Michael Yang's avatar
Michael Yang committed
288

Michael Yang's avatar
Michael Yang committed
289
290
291
func isNewline(r rune) bool {
	return r == '\r' || r == '\n'
}
292

Michael Yang's avatar
Michael Yang committed
293
func isValidMessageRole(role string) bool {
Michael Yang's avatar
Michael Yang committed
294
	return role == "system" || role == "user" || role == "assistant"
295
}
Michael Yang's avatar
Michael Yang committed
296
297
298
299
300
301
302
303
304

func isValidCommand(cmd string) bool {
	switch strings.ToLower(cmd) {
	case "from", "license", "template", "system", "adapter", "parameter", "message":
		return true
	default:
		return false
	}
}