parser.go 6.16 KB
Newer Older
1
package parser
2
3
4

import (
	"bufio"
5
	"bytes"
6
	"errors"
7
	"fmt"
8
	"io"
Michael Yang's avatar
Michael Yang committed
9
10
	"strconv"
	"strings"
Michael Yang's avatar
Michael Yang committed
11
	"unicode"
12
13
)

Michael Yang's avatar
Michael Yang committed
14
15
16
17
18
19
20
21
22
23
24
25
26
type File struct {
	Commands []Command
}

func (f File) String() string {
	var sb strings.Builder
	for _, cmd := range f.Commands {
		fmt.Fprintln(&sb, cmd.String())
	}

	return sb.String()
}

27
28
type Command struct {
	Name string
29
30
31
	Args string
}

Michael Yang's avatar
Michael Yang committed
32
func (c Command) String() string {
Michael Yang's avatar
Michael Yang committed
33
	var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
34
35
	switch c.Name {
	case "model":
Michael Yang's avatar
Michael Yang committed
36
		fmt.Fprintf(&sb, "FROM %s", c.Args)
Michael Yang's avatar
Michael Yang committed
37
	case "license", "template", "system", "adapter":
Michael Yang's avatar
Michael Yang committed
38
		fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args))
Michael Yang's avatar
Michael Yang committed
39
40
	case "message":
		role, message, _ := strings.Cut(c.Args, ": ")
Michael Yang's avatar
Michael Yang committed
41
		fmt.Fprintf(&sb, "MESSAGE %s %s", role, quote(message))
Michael Yang's avatar
Michael Yang committed
42
	default:
Michael Yang's avatar
Michael Yang committed
43
		fmt.Fprintf(&sb, "PARAMETER %s %s", c.Name, quote(c.Args))
Michael Yang's avatar
Michael Yang committed
44
45
	}

Michael Yang's avatar
Michael Yang committed
46
	return sb.String()
Michael Yang's avatar
Michael Yang committed
47
48
}

Michael Yang's avatar
Michael Yang committed
49
type state int
50

Michael Yang's avatar
Michael Yang committed
51
52
53
54
55
56
57
58
const (
	stateNil state = iota
	stateName
	stateValue
	stateParameter
	stateMessage
	stateComment
)
59

Michael Yang's avatar
tests  
Michael Yang committed
60
var (
Michael Yang's avatar
Michael Yang committed
61
62
63
	errMissingFrom        = errors.New("no FROM line")
	errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"")
	errInvalidCommand     = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
Michael Yang's avatar
tests  
Michael Yang committed
64
)
Michael Yang's avatar
Michael Yang committed
65

Michael Yang's avatar
Michael Yang committed
66
func ParseFile(r io.Reader) (*File, error) {
Michael Yang's avatar
Michael Yang committed
67
68
69
70
71
	var cmd Command
	var curr state
	var b bytes.Buffer
	var role string

Michael Yang's avatar
Michael Yang committed
72
73
74
75
76
	var lineCount int
	var linePos int

	var utf16 bool

Michael Yang's avatar
Michael Yang committed
77
78
	var f File

Michael Yang's avatar
Michael Yang committed
79
	br := bufio.NewReader(r)
Michael Yang's avatar
Michael Yang committed
80
81
82
83
84
85
86
	for {
		r, _, err := br.ReadRune()
		if errors.Is(err, io.EOF) {
			break
		} else if err != nil {
			return nil, err
		}
87

Michael Yang's avatar
Michael Yang committed
88
89
90
91
92
		// the utf16 byte order mark will be read as "unreadable" by ReadRune()
		if isUnreadable(r) && lineCount == 0 && linePos == 0 {
			utf16 = true
			continue
		}
93

Michael Yang's avatar
Michael Yang committed
94
95
96
		// skip the second byte if we're reading utf16
		if utf16 && r == 0 {
			continue
97
98
		}

Michael Yang's avatar
Michael Yang committed
99
100
101
102
103
		next, r, err := parseRuneForState(r, curr)
		if errors.Is(err, io.ErrUnexpectedEOF) {
			return nil, fmt.Errorf("%w: %s", err, b.String())
		} else if err != nil {
			return nil, err
104
105
		}

Michael Yang's avatar
Michael Yang committed
106
107
108
109
110
111
112
		if isNewline(r) {
			lineCount++
			linePos = 0
		} else {
			linePos++
		}

Michael Yang's avatar
Michael Yang committed
113
		// process the state transition, some transitions need to be intercepted and redirected
Michael Yang's avatar
Michael Yang committed
114
115
		if next != curr {
			switch curr {
Michael Yang's avatar
Michael Yang committed
116
117
118
119
120
			case stateName:
				if !isValidCommand(b.String()) {
					return nil, errInvalidCommand
				}

Michael Yang's avatar
Michael Yang committed
121
				// next state sometimes depends on the current buffer value
Michael Yang's avatar
Michael Yang committed
122
123
124
125
				switch s := strings.ToLower(b.String()); s {
				case "from":
					cmd.Name = "model"
				case "parameter":
Michael Yang's avatar
Michael Yang committed
126
					// transition to stateParameter which sets command name
Michael Yang's avatar
Michael Yang committed
127
128
					next = stateParameter
				case "message":
Michael Yang's avatar
Michael Yang committed
129
					// transition to stateMessage which validates the message role
Michael Yang's avatar
Michael Yang committed
130
131
132
133
134
					next = stateMessage
					fallthrough
				default:
					cmd.Name = s
				}
Michael Yang's avatar
Michael Yang committed
135
136
			case stateParameter:
				cmd.Name = b.String()
Michael Yang's avatar
Michael Yang committed
137
			case stateMessage:
Michael Yang's avatar
Michael Yang committed
138
				if !isValidMessageRole(b.String()) {
Michael Yang's avatar
Michael Yang committed
139
					return nil, errInvalidMessageRole
Michael Yang's avatar
Michael Yang committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
				}

				role = b.String()
			case stateComment, stateNil:
				// pass
			case stateValue:
				s, ok := unquote(b.String())
				if !ok || isSpace(r) {
					if _, err := b.WriteRune(r); err != nil {
						return nil, err
					}

					continue
				}

				if role != "" {
					s = role + ": " + s
					role = ""
				}

				cmd.Args = s
Michael Yang's avatar
Michael Yang committed
161
				f.Commands = append(f.Commands, cmd)
Michael Yang's avatar
Michael Yang committed
162
163
			}

Michael Yang's avatar
Michael Yang committed
164
165
166
167
168
169
170
			b.Reset()
			curr = next
		}

		if strconv.IsPrint(r) {
			if _, err := b.WriteRune(r); err != nil {
				return nil, err
Michael Yang's avatar
Michael Yang committed
171
			}
Michael Yang's avatar
Michael Yang committed
172
173
174
175
176
177
178
179
		}
	}

	// flush the buffer
	switch curr {
	case stateComment, stateNil:
		// pass; nothing to flush
	case stateValue:
Michael Yang's avatar
Michael Yang committed
180
181
		s, ok := unquote(b.String())
		if !ok {
Michael Yang's avatar
Michael Yang committed
182
			return nil, io.ErrUnexpectedEOF
183
		}
184

Michael Yang's avatar
Michael Yang committed
185
186
187
188
189
		if role != "" {
			s = role + ": " + s
		}

		cmd.Args = s
Michael Yang's avatar
Michael Yang committed
190
		f.Commands = append(f.Commands, cmd)
Michael Yang's avatar
Michael Yang committed
191
192
	default:
		return nil, io.ErrUnexpectedEOF
193
194
	}

Michael Yang's avatar
Michael Yang committed
195
	for _, cmd := range f.Commands {
Michael Yang's avatar
Michael Yang committed
196
		if cmd.Name == "model" {
Michael Yang's avatar
Michael Yang committed
197
			return &f, nil
Michael Yang's avatar
Michael Yang committed
198
		}
199
200
	}

Michael Yang's avatar
tests  
Michael Yang committed
201
	return nil, errMissingFrom
202
}
203

Michael Yang's avatar
Michael Yang committed
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
func parseRuneForState(r rune, cs state) (state, rune, error) {
	switch cs {
	case stateNil:
		switch {
		case r == '#':
			return stateComment, 0, nil
		case isSpace(r), isNewline(r):
			return stateNil, 0, nil
		default:
			return stateName, r, nil
		}
	case stateName:
		switch {
		case isAlpha(r):
			return stateName, r, nil
		case isSpace(r):
			return stateValue, 0, nil
		default:
Michael Yang's avatar
Michael Yang committed
222
			return stateNil, 0, errInvalidCommand
Michael Yang's avatar
Michael Yang committed
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
		}
	case stateValue:
		switch {
		case isNewline(r):
			return stateNil, r, nil
		case isSpace(r):
			return stateNil, r, nil
		default:
			return stateValue, r, nil
		}
	case stateParameter:
		switch {
		case isAlpha(r), isNumber(r), r == '_':
			return stateParameter, r, nil
		case isSpace(r):
			return stateValue, 0, nil
		default:
			return stateNil, 0, io.ErrUnexpectedEOF
		}
	case stateMessage:
		switch {
		case isAlpha(r):
			return stateMessage, r, nil
		case isSpace(r):
			return stateValue, 0, nil
		default:
			return stateNil, 0, io.ErrUnexpectedEOF
		}
	case stateComment:
		switch {
		case isNewline(r):
			return stateNil, 0, nil
		default:
			return stateComment, 0, nil
		}
	default:
		return stateNil, 0, errors.New("")
260
	}
Michael Yang's avatar
Michael Yang committed
261
}
262

Michael Yang's avatar
Michael Yang committed
263
func quote(s string) string {
264
	if strings.Contains(s, "\n") || strings.HasPrefix(s, " ") || strings.HasSuffix(s, " ") {
Michael Yang's avatar
Michael Yang committed
265
266
267
268
		if strings.Contains(s, "\"") {
			return `"""` + s + `"""`
		}

269
		return `"` + s + `"`
Michael Yang's avatar
Michael Yang committed
270
271
272
273
274
	}

	return s
}

Michael Yang's avatar
Michael Yang committed
275
276
277
278
279
280
281
282
func unquote(s string) (string, bool) {
	// TODO: single quotes
	if len(s) >= 3 && s[:3] == `"""` {
		if len(s) >= 6 && s[len(s)-3:] == `"""` {
			return s[3 : len(s)-3], true
		}

		return "", false
283
284
	}

Michael Yang's avatar
Michael Yang committed
285
286
287
288
289
290
	if len(s) >= 1 && s[0] == '"' {
		if len(s) >= 2 && s[len(s)-1] == '"' {
			return s[1 : len(s)-1], true
		}

		return "", false
291
292
	}

Michael Yang's avatar
Michael Yang committed
293
	return s, true
294
295
}

Michael Yang's avatar
Michael Yang committed
296
297
298
func isAlpha(r rune) bool {
	return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
}
299

Michael Yang's avatar
Michael Yang committed
300
301
302
func isNumber(r rune) bool {
	return r >= '0' && r <= '9'
}
303

Michael Yang's avatar
Michael Yang committed
304
305
306
func isSpace(r rune) bool {
	return r == ' ' || r == '\t'
}
Michael Yang's avatar
Michael Yang committed
307

Michael Yang's avatar
Michael Yang committed
308
309
310
func isNewline(r rune) bool {
	return r == '\r' || r == '\n'
}
311

Michael Yang's avatar
Michael Yang committed
312
313
314
315
func isUnreadable(r rune) bool {
	return r == unicode.ReplacementChar
}

Michael Yang's avatar
Michael Yang committed
316
func isValidMessageRole(role string) bool {
Michael Yang's avatar
Michael Yang committed
317
	return role == "system" || role == "user" || role == "assistant"
318
}
Michael Yang's avatar
Michael Yang committed
319
320
321
322
323
324
325
326
327

func isValidCommand(cmd string) bool {
	switch strings.ToLower(cmd) {
	case "from", "license", "template", "system", "adapter", "parameter", "message":
		return true
	default:
		return false
	}
}