input.go 1.78 KB
Newer Older
1
2
package input

3
4
import "github.com/ollama/ollama/ml"

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// Input represents one token in the input stream
type Input struct {
	// Token is a single element of text.
	Token int32

	// Multimodal is opaque data representing a non-text
	// element such as an image (or part of one if the image
	// can be processed in pieces). It may be either together
	// with Token or on its own.
	Multimodal any

	// MultimodalHash is a unique representation of the data
	// stored in Multimodal, used for caching and comparing
	// equality.
	MultimodalHash uint64
20
21
22
23
24
25

	// SameBatch forces the following number of tokens to be processed
	// in a single batch, breaking and extending batches as needed.
	// Useful for things like images that must be processed in one
	// shot.
	SameBatch int
26
27
28
29
30
31
32
33
34
35
36
37
}

// MultimodalIndex is a multimodal element (such as an image)
// together with an index into the slice of Inputs with the
// corresponding token. Note that the index is not the same
// as the position - to find that use the index with the
// Positions slice.
type MultimodalIndex struct {
	Index      int
	Multimodal any
}

Jesse Gross's avatar
Jesse Gross committed
38
39
40
// Batch contains the inputs for a model forward pass
type Batch struct {
	// Inputs is the input tokens, including placeholders for multimodal inputs.
41
	Inputs ml.Tensor
Jesse Gross's avatar
Jesse Gross committed
42
43
44
45

	// Multimodal is a set of multimodal embeddings previously created by
	// EncodeMultimodal, along with an index into Inputs. Unused for text-only
	// models or for batches without multimodal elements.
46
	Multimodal []MultimodalIndex
Jesse Gross's avatar
Jesse Gross committed
47
48
49
50
51
52
53
54
55
56
57

	// Positions is the position for each Input, relative to its sequence. Equal
	// in length to Inputs.
	Positions []int32

	// Sequences is the sequence for each Input. Equal in length to Inputs.
	Sequences []int

	// Outputs are the set of indicies into Inputs for which output data should
	// be returned.
	Outputs []int32
58
}