Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangkx1
ollama_dcu
Commits
22cb4ffc
Commit
22cb4ffc
authored
Aug 13, 2024
by
wangkx1
Browse files
remove v0.3.5
parent
cb75098c
Pipeline
#1532
canceled with stages
Changes
335
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
1347 deletions
+0
-1347
ollama/llm/ggml_test.go
ollama/llm/ggml_test.go
+0
-1
ollama/llm/gguf.go
ollama/llm/gguf.go
+0
-663
ollama/llm/llama.cpp/.clang-tidy
ollama/llm/llama.cpp/.clang-tidy
+0
-24
ollama/llm/llama.cpp/.devops/cloud-v-pipeline
ollama/llm/llama.cpp/.devops/cloud-v-pipeline
+0
-22
ollama/llm/llama.cpp/.devops/full-cuda.Dockerfile
ollama/llm/llama.cpp/.devops/full-cuda.Dockerfile
+0
-36
ollama/llm/llama.cpp/.devops/full-rocm.Dockerfile
ollama/llm/llama.cpp/.devops/full-rocm.Dockerfile
+0
-50
ollama/llm/llama.cpp/.devops/full.Dockerfile
ollama/llm/llama.cpp/.devops/full.Dockerfile
+0
-25
ollama/llm/llama.cpp/.devops/llama-cli-cuda.Dockerfile
ollama/llm/llama.cpp/.devops/llama-cli-cuda.Dockerfile
+0
-35
ollama/llm/llama.cpp/.devops/llama-cli-intel.Dockerfile
ollama/llm/llama.cpp/.devops/llama-cli-intel.Dockerfile
+0
-28
ollama/llm/llama.cpp/.devops/llama-cli-rocm.Dockerfile
ollama/llm/llama.cpp/.devops/llama-cli-rocm.Dockerfile
+0
-45
ollama/llm/llama.cpp/.devops/llama-cli-vulkan.Dockerfile
ollama/llm/llama.cpp/.devops/llama-cli-vulkan.Dockerfile
+0
-27
ollama/llm/llama.cpp/.devops/llama-cli.Dockerfile
ollama/llm/llama.cpp/.devops/llama-cli.Dockerfile
+0
-23
ollama/llm/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
ollama/llm/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
+0
-83
ollama/llm/llama.cpp/.devops/llama-cpp.srpm.spec
ollama/llm/llama.cpp/.devops/llama-cpp.srpm.spec
+0
-85
ollama/llm/llama.cpp/.devops/llama-server-cuda.Dockerfile
ollama/llm/llama.cpp/.devops/llama-server-cuda.Dockerfile
+0
-39
ollama/llm/llama.cpp/.devops/llama-server-intel.Dockerfile
ollama/llm/llama.cpp/.devops/llama-server-intel.Dockerfile
+0
-32
ollama/llm/llama.cpp/.devops/llama-server-rocm.Dockerfile
ollama/llm/llama.cpp/.devops/llama-server-rocm.Dockerfile
+0
-52
ollama/llm/llama.cpp/.devops/llama-server-vulkan.Dockerfile
ollama/llm/llama.cpp/.devops/llama-server-vulkan.Dockerfile
+0
-29
ollama/llm/llama.cpp/.devops/llama-server.Dockerfile
ollama/llm/llama.cpp/.devops/llama-server.Dockerfile
+0
-27
ollama/llm/llama.cpp/.devops/nix/apps.nix
ollama/llm/llama.cpp/.devops/nix/apps.nix
+0
-21
No files found.
Too many changes to show.
To preserve performance only
335 of 335+
files are displayed.
Plain diff
Email patch
ollama/llm/ggml_test.go
deleted
100755 → 0
View file @
cb75098c
package
llm
ollama/llm/gguf.go
deleted
100755 → 0
View file @
cb75098c
package
llm
import
(
"bytes"
"cmp"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log/slog"
"slices"
"strings"
"golang.org/x/exp/maps"
)
type
containerGGUF
struct
{
ByteOrder
binary
.
ByteOrder
Version
uint32
V1
struct
{
NumTensor
uint32
NumKV
uint32
}
V2
struct
{
NumTensor
uint64
NumKV
uint64
}
V3
struct
{
NumTensor
uint64
NumKV
uint64
}
maxArraySize
int
}
func
(
c
*
containerGGUF
)
canCollectArray
(
size
int
)
bool
{
return
c
.
maxArraySize
<
0
||
size
<=
c
.
maxArraySize
}
func
(
c
*
containerGGUF
)
Name
()
string
{
return
"gguf"
}
func
(
c
*
containerGGUF
)
Decode
(
rs
io
.
ReadSeeker
)
(
model
,
error
)
{
if
err
:=
binary
.
Read
(
rs
,
c
.
ByteOrder
,
&
c
.
Version
);
err
!=
nil
{
return
nil
,
err
}
var
err
error
switch
c
.
Version
{
case
1
:
err
=
binary
.
Read
(
rs
,
c
.
ByteOrder
,
&
c
.
V1
)
case
2
:
err
=
binary
.
Read
(
rs
,
c
.
ByteOrder
,
&
c
.
V2
)
default
:
err
=
binary
.
Read
(
rs
,
c
.
ByteOrder
,
&
c
.
V3
)
}
if
err
!=
nil
{
return
nil
,
err
}
model
:=
newGGUF
(
c
)
if
err
:=
model
.
Decode
(
rs
);
err
!=
nil
{
return
nil
,
err
}
return
model
,
nil
}
const
(
ggufTypeUint8
uint32
=
iota
ggufTypeInt8
ggufTypeUint16
ggufTypeInt16
ggufTypeUint32
ggufTypeInt32
ggufTypeFloat32
ggufTypeBool
ggufTypeString
ggufTypeArray
ggufTypeUint64
ggufTypeInt64
ggufTypeFloat64
)
type
gguf
struct
{
*
containerGGUF
kv
KV
tensors
[]
*
Tensor
parameters
uint64
tensorOffset
uint64
scratch
[
16
<<
10
]
byte
}
func
newGGUF
(
container
*
containerGGUF
)
*
gguf
{
return
&
gguf
{
containerGGUF
:
container
,
kv
:
make
(
KV
),
}
}
func
(
llm
*
gguf
)
KV
()
KV
{
return
llm
.
kv
}
func
(
llm
*
gguf
)
Tensors
()
Tensors
{
return
Tensors
{
Items
:
llm
.
tensors
,
Offset
:
llm
.
tensorOffset
,
}
}
func
(
llm
*
gguf
)
numTensor
()
uint64
{
switch
llm
.
Version
{
case
1
:
return
uint64
(
llm
.
V1
.
NumTensor
)
case
2
:
return
llm
.
V2
.
NumTensor
default
:
return
llm
.
V3
.
NumTensor
}
}
func
(
llm
*
gguf
)
numKV
()
uint64
{
switch
llm
.
Version
{
case
1
:
return
uint64
(
llm
.
V1
.
NumKV
)
case
2
:
return
llm
.
V2
.
NumKV
default
:
return
llm
.
V3
.
NumKV
}
}
func
(
llm
*
gguf
)
Decode
(
rs
io
.
ReadSeeker
)
error
{
// decode key-values
for
i
:=
0
;
uint64
(
i
)
<
llm
.
numKV
();
i
++
{
k
,
err
:=
readGGUFString
(
llm
,
rs
)
if
err
!=
nil
{
return
err
}
t
,
err
:=
readGGUF
[
uint32
](
llm
,
rs
)
if
err
!=
nil
{
return
err
}
var
v
any
switch
t
{
case
ggufTypeUint8
:
v
,
err
=
readGGUF
[
uint8
](
llm
,
rs
)
case
ggufTypeInt8
:
v
,
err
=
readGGUF
[
int8
](
llm
,
rs
)
case
ggufTypeUint16
:
v
,
err
=
readGGUF
[
uint16
](
llm
,
rs
)
case
ggufTypeInt16
:
v
,
err
=
readGGUF
[
int16
](
llm
,
rs
)
case
ggufTypeUint32
:
v
,
err
=
readGGUF
[
uint32
](
llm
,
rs
)
case
ggufTypeInt32
:
v
,
err
=
readGGUF
[
int32
](
llm
,
rs
)
case
ggufTypeUint64
:
v
,
err
=
readGGUF
[
uint64
](
llm
,
rs
)
case
ggufTypeInt64
:
v
,
err
=
readGGUF
[
int64
](
llm
,
rs
)
case
ggufTypeFloat32
:
v
,
err
=
readGGUF
[
float32
](
llm
,
rs
)
case
ggufTypeFloat64
:
v
,
err
=
readGGUF
[
float64
](
llm
,
rs
)
case
ggufTypeBool
:
v
,
err
=
readGGUF
[
bool
](
llm
,
rs
)
case
ggufTypeString
:
v
,
err
=
readGGUFString
(
llm
,
rs
)
case
ggufTypeArray
:
v
,
err
=
readGGUFArray
(
llm
,
rs
)
default
:
return
fmt
.
Errorf
(
"invalid type: %d"
,
t
)
}
if
err
!=
nil
{
return
err
}
llm
.
kv
[
k
]
=
v
}
// decode tensors
for
range
llm
.
numTensor
()
{
name
,
err
:=
readGGUFString
(
llm
,
rs
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to read tensor name: %w"
,
err
)
}
// dims is the number of dimensions in the tensor
dims
,
err
:=
readGGUF
[
uint32
](
llm
,
rs
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to read tensor dimensions: %w"
,
err
)
}
shape
:=
make
([]
uint64
,
dims
)
for
i
:=
0
;
uint32
(
i
)
<
dims
;
i
++
{
shape
[
i
],
err
=
readGGUF
[
uint64
](
llm
,
rs
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to read tensor shape: %w"
,
err
)
}
}
kind
,
err
:=
readGGUF
[
uint32
](
llm
,
rs
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to read tensor kind: %w"
,
err
)
}
offset
,
err
:=
readGGUF
[
uint64
](
llm
,
rs
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to read tensor offset: %w"
,
err
)
}
tensor
:=
Tensor
{
Name
:
name
,
Kind
:
kind
,
Offset
:
offset
,
Shape
:
shape
[
:
],
}
llm
.
tensors
=
append
(
llm
.
tensors
,
&
tensor
)
llm
.
parameters
+=
tensor
.
parameters
()
}
// patch KV with parameter count
llm
.
kv
[
"general.parameter_count"
]
=
llm
.
parameters
alignment
,
ok
:=
llm
.
kv
[
"general.alignment"
]
.
(
uint32
)
if
!
ok
{
alignment
=
32
}
offset
,
err
:=
rs
.
Seek
(
0
,
io
.
SeekCurrent
)
if
err
!=
nil
{
return
err
}
padding
:=
ggufPadding
(
offset
,
int64
(
alignment
))
llm
.
tensorOffset
=
uint64
(
offset
+
padding
)
for
_
,
tensor
:=
range
llm
.
tensors
{
offset
,
err
:=
rs
.
Seek
(
0
,
io
.
SeekCurrent
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to get current offset: %w"
,
err
)
}
padding
:=
ggufPadding
(
offset
,
int64
(
alignment
))
if
_
,
err
:=
rs
.
Seek
(
padding
,
io
.
SeekCurrent
);
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to seek to init padding: %w"
,
err
)
}
if
_
,
err
:=
rs
.
Seek
(
int64
(
tensor
.
Size
()),
io
.
SeekCurrent
);
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to seek to tensor: %w"
,
err
)
}
}
return
nil
}
func
readGGUF
[
T
any
](
llm
*
gguf
,
r
io
.
Reader
)
(
T
,
error
)
{
var
t
T
err
:=
binary
.
Read
(
r
,
llm
.
ByteOrder
,
&
t
)
return
t
,
err
}
func
writeGGUF
[
V
any
](
w
io
.
Writer
,
t
uint32
,
v
V
)
error
{
if
err
:=
binary
.
Write
(
w
,
binary
.
LittleEndian
,
t
);
err
!=
nil
{
return
err
}
return
binary
.
Write
(
w
,
binary
.
LittleEndian
,
v
)
}
func
readGGUFV1String
(
llm
*
gguf
,
r
io
.
Reader
)
(
string
,
error
)
{
var
length
uint64
if
err
:=
binary
.
Read
(
r
,
llm
.
ByteOrder
,
&
length
);
err
!=
nil
{
return
""
,
err
}
var
b
bytes
.
Buffer
if
_
,
err
:=
io
.
CopyN
(
&
b
,
r
,
int64
(
length
));
err
!=
nil
{
return
""
,
err
}
// gguf v1 strings are null-terminated
b
.
Truncate
(
b
.
Len
()
-
1
)
return
b
.
String
(),
nil
}
func
discardGGUFString
(
llm
*
gguf
,
r
io
.
Reader
)
error
{
buf
:=
llm
.
scratch
[
:
8
]
_
,
err
:=
io
.
ReadFull
(
r
,
buf
)
if
err
!=
nil
{
return
err
}
size
:=
int
(
llm
.
ByteOrder
.
Uint64
(
buf
))
for
size
>
0
{
n
,
err
:=
r
.
Read
(
llm
.
scratch
[
:
min
(
size
,
cap
(
llm
.
scratch
))])
if
err
!=
nil
{
return
err
}
size
-=
n
}
return
nil
}
func
readGGUFString
(
llm
*
gguf
,
r
io
.
Reader
)
(
string
,
error
)
{
if
llm
.
Version
==
1
{
return
readGGUFV1String
(
llm
,
r
)
}
buf
:=
llm
.
scratch
[
:
8
]
_
,
err
:=
io
.
ReadFull
(
r
,
buf
)
if
err
!=
nil
{
return
""
,
err
}
length
:=
int
(
llm
.
ByteOrder
.
Uint64
(
buf
))
if
length
>
len
(
llm
.
scratch
)
{
buf
=
make
([]
byte
,
length
)
}
else
{
buf
=
llm
.
scratch
[
:
length
]
}
clear
(
buf
)
_
,
err
=
io
.
ReadFull
(
r
,
buf
)
if
err
!=
nil
{
return
""
,
err
}
return
string
(
buf
),
nil
}
func
writeGGUFString
(
w
io
.
Writer
,
s
string
)
error
{
if
err
:=
binary
.
Write
(
w
,
binary
.
LittleEndian
,
ggufTypeString
);
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
w
,
binary
.
LittleEndian
,
uint64
(
len
(
s
)));
err
!=
nil
{
return
err
}
_
,
err
:=
io
.
Copy
(
w
,
strings
.
NewReader
(
s
))
return
err
}
type
array
struct
{
size
int
values
[]
any
}
func
(
a
*
array
)
MarshalJSON
()
([]
byte
,
error
)
{
return
json
.
Marshal
(
a
.
values
)
}
func
readGGUFV1Array
(
llm
*
gguf
,
r
io
.
Reader
)
(
*
array
,
error
)
{
t
,
err
:=
readGGUF
[
uint32
](
llm
,
r
)
if
err
!=
nil
{
return
nil
,
err
}
n
,
err
:=
readGGUF
[
uint32
](
llm
,
r
)
if
err
!=
nil
{
return
nil
,
err
}
a
:=
&
array
{
size
:
int
(
n
)}
if
llm
.
canCollectArray
(
int
(
n
))
{
a
.
values
=
make
([]
any
,
0
,
int
(
n
))
}
for
i
:=
range
n
{
var
e
any
switch
t
{
case
ggufTypeUint8
:
e
,
err
=
readGGUF
[
uint8
](
llm
,
r
)
case
ggufTypeInt8
:
e
,
err
=
readGGUF
[
int8
](
llm
,
r
)
case
ggufTypeUint16
:
e
,
err
=
readGGUF
[
uint16
](
llm
,
r
)
case
ggufTypeInt16
:
e
,
err
=
readGGUF
[
int16
](
llm
,
r
)
case
ggufTypeUint32
:
e
,
err
=
readGGUF
[
uint32
](
llm
,
r
)
case
ggufTypeInt32
:
e
,
err
=
readGGUF
[
int32
](
llm
,
r
)
case
ggufTypeUint64
:
e
,
err
=
readGGUF
[
uint64
](
llm
,
r
)
case
ggufTypeInt64
:
e
,
err
=
readGGUF
[
int64
](
llm
,
r
)
case
ggufTypeFloat32
:
e
,
err
=
readGGUF
[
float32
](
llm
,
r
)
case
ggufTypeFloat64
:
e
,
err
=
readGGUF
[
float64
](
llm
,
r
)
case
ggufTypeBool
:
e
,
err
=
readGGUF
[
bool
](
llm
,
r
)
case
ggufTypeString
:
e
,
err
=
readGGUFV1String
(
llm
,
r
)
default
:
return
nil
,
fmt
.
Errorf
(
"invalid array type: %d"
,
t
)
}
if
err
!=
nil
{
return
nil
,
err
}
if
a
.
values
!=
nil
{
a
.
values
[
i
]
=
e
}
}
return
a
,
nil
}
func
readGGUFArray
(
llm
*
gguf
,
r
io
.
Reader
)
(
*
array
,
error
)
{
if
llm
.
Version
==
1
{
return
readGGUFV1Array
(
llm
,
r
)
}
t
,
err
:=
readGGUF
[
uint32
](
llm
,
r
)
if
err
!=
nil
{
return
nil
,
err
}
n
,
err
:=
readGGUF
[
uint64
](
llm
,
r
)
if
err
!=
nil
{
return
nil
,
err
}
a
:=
&
array
{
size
:
int
(
n
)}
if
llm
.
canCollectArray
(
int
(
n
))
{
a
.
values
=
make
([]
any
,
int
(
n
))
}
for
i
:=
range
n
{
var
e
any
switch
t
{
case
ggufTypeUint8
:
e
,
err
=
readGGUF
[
uint8
](
llm
,
r
)
case
ggufTypeInt8
:
e
,
err
=
readGGUF
[
int8
](
llm
,
r
)
case
ggufTypeUint16
:
e
,
err
=
readGGUF
[
uint16
](
llm
,
r
)
case
ggufTypeInt16
:
e
,
err
=
readGGUF
[
int16
](
llm
,
r
)
case
ggufTypeUint32
:
e
,
err
=
readGGUF
[
uint32
](
llm
,
r
)
case
ggufTypeInt32
:
e
,
err
=
readGGUF
[
int32
](
llm
,
r
)
case
ggufTypeUint64
:
e
,
err
=
readGGUF
[
uint64
](
llm
,
r
)
case
ggufTypeInt64
:
e
,
err
=
readGGUF
[
int64
](
llm
,
r
)
case
ggufTypeFloat32
:
e
,
err
=
readGGUF
[
float32
](
llm
,
r
)
case
ggufTypeFloat64
:
e
,
err
=
readGGUF
[
float64
](
llm
,
r
)
case
ggufTypeBool
:
e
,
err
=
readGGUF
[
bool
](
llm
,
r
)
case
ggufTypeString
:
if
a
.
values
!=
nil
{
e
,
err
=
readGGUFString
(
llm
,
r
)
}
else
{
err
=
discardGGUFString
(
llm
,
r
)
}
default
:
return
nil
,
fmt
.
Errorf
(
"invalid array type: %d"
,
t
)
}
if
err
!=
nil
{
return
nil
,
err
}
if
a
.
values
!=
nil
{
a
.
values
[
i
]
=
e
}
}
return
a
,
nil
}
// writeGGUFArray writes a slice s of type E to the write with a gguf type of t
func
writeGGUFArray
[
S
~
[]
E
,
E
any
](
w
io
.
Writer
,
t
uint32
,
s
S
)
error
{
if
err
:=
binary
.
Write
(
w
,
binary
.
LittleEndian
,
ggufTypeArray
);
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
w
,
binary
.
LittleEndian
,
t
);
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
w
,
binary
.
LittleEndian
,
uint64
(
len
(
s
)));
err
!=
nil
{
return
err
}
return
binary
.
Write
(
w
,
binary
.
LittleEndian
,
s
)
}
func
WriteGGUF
(
ws
io
.
WriteSeeker
,
kv
KV
,
ts
[]
Tensor
)
error
{
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
[]
byte
(
"GGUF"
));
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint32
(
3
));
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint64
(
len
(
ts
)));
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint64
(
len
(
kv
)));
err
!=
nil
{
return
err
}
keys
:=
maps
.
Keys
(
kv
)
slices
.
Sort
(
keys
)
for
_
,
key
:=
range
keys
{
if
err
:=
ggufWriteKV
(
ws
,
key
,
kv
[
key
]);
err
!=
nil
{
return
err
}
}
slices
.
SortFunc
(
ts
,
func
(
a
,
b
Tensor
)
int
{
var
i
,
j
int
if
n
,
err
:=
fmt
.
Sscanf
(
a
.
Name
,
"blk.%d"
,
&
i
);
err
!=
nil
||
n
!=
1
{
return
cmp
.
Compare
(
a
.
Name
,
b
.
Name
)
}
else
if
n
,
err
:=
fmt
.
Sscanf
(
b
.
Name
,
"blk.%d"
,
&
j
);
err
!=
nil
||
n
!=
1
{
return
cmp
.
Compare
(
a
.
Name
,
b
.
Name
)
}
return
cmp
.
Compare
(
i
,
j
)
})
var
s
uint64
for
_
,
t
:=
range
ts
{
t
.
Offset
=
s
if
err
:=
ggufWriteTensorInfo
(
ws
,
t
);
err
!=
nil
{
return
err
}
s
+=
t
.
Size
()
}
var
alignment
int64
=
32
for
_
,
t
:=
range
ts
{
if
err
:=
ggufWriteTensor
(
ws
,
t
,
alignment
);
err
!=
nil
{
return
err
}
}
return
nil
}
func
ggufWriteKV
(
ws
io
.
WriteSeeker
,
k
string
,
v
any
)
error
{
slog
.
Debug
(
k
,
"type"
,
fmt
.
Sprintf
(
"%T"
,
v
))
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint64
(
len
(
k
)));
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
[]
byte
(
k
));
err
!=
nil
{
return
err
}
var
err
error
switch
v
:=
v
.
(
type
)
{
case
uint32
:
err
=
writeGGUF
(
ws
,
ggufTypeUint32
,
v
)
case
float32
:
err
=
writeGGUF
(
ws
,
ggufTypeFloat32
,
v
)
case
bool
:
err
=
writeGGUF
(
ws
,
ggufTypeBool
,
v
)
case
string
:
err
=
writeGGUFString
(
ws
,
v
)
case
[]
int32
:
err
=
writeGGUFArray
(
ws
,
ggufTypeInt32
,
v
)
case
[]
uint32
:
err
=
writeGGUFArray
(
ws
,
ggufTypeUint32
,
v
)
case
[]
float32
:
err
=
writeGGUFArray
(
ws
,
ggufTypeFloat32
,
v
)
case
[]
string
:
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
ggufTypeArray
);
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
ggufTypeString
);
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint64
(
len
(
v
)));
err
!=
nil
{
return
err
}
for
_
,
e
:=
range
v
{
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint64
(
len
(
e
)));
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
[]
byte
(
e
));
err
!=
nil
{
return
err
}
}
default
:
return
fmt
.
Errorf
(
"improper type for '%s'"
,
k
)
}
return
err
}
func
ggufWriteTensorInfo
(
ws
io
.
WriteSeeker
,
t
Tensor
)
error
{
slog
.
Debug
(
t
.
Name
,
"kind"
,
t
.
Kind
,
"shape"
,
t
.
Shape
,
"offset"
,
t
.
Offset
)
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint64
(
len
(
t
.
Name
)));
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
[]
byte
(
t
.
Name
));
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint32
(
len
(
t
.
Shape
)));
err
!=
nil
{
return
err
}
for
i
:=
range
len
(
t
.
Shape
)
{
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
t
.
Shape
[
len
(
t
.
Shape
)
-
i
-
1
]);
err
!=
nil
{
return
err
}
}
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
t
.
Kind
);
err
!=
nil
{
return
err
}
return
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
t
.
Offset
)
}
func
ggufWriteTensor
(
ws
io
.
WriteSeeker
,
t
Tensor
,
alignment
int64
)
error
{
offset
,
err
:=
ws
.
Seek
(
0
,
io
.
SeekCurrent
)
if
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
bytes
.
Repeat
([]
byte
{
0
},
int
(
ggufPadding
(
offset
,
alignment
))));
err
!=
nil
{
return
err
}
_
,
err
=
t
.
WriteTo
(
ws
)
return
err
}
func
ggufPadding
(
offset
,
align
int64
)
int64
{
return
(
align
-
offset
%
align
)
%
align
}
ollama/llm/llama.cpp/.clang-tidy
deleted
100755 → 0
View file @
cb75098c
---
Checks: >
bugprone-*,
-bugprone-easily-swappable-parameters,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-misplaced-widening-cast,
-bugprone-narrowing-conversions,
readability-*,
-readability-avoid-unconditional-preprocessor-if,
-readability-function-cognitive-complexity,
-readability-identifier-length,
-readability-implicit-bool-conversion,
-readability-magic-numbers,
-readability-uppercase-literal-suffix,
-readability-simplify-boolean-expr,
clang-analyzer-*,
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
performance-*,
portability-*,
misc-*,
-misc-const-correctness,
-misc-non-private-member-variables-in-classes,
-misc-no-recursion,
FormatStyle: none
ollama/llm/llama.cpp/.devops/cloud-v-pipeline
deleted
100755 → 0
View file @
cb75098c
node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
stage('Cleanup'){
cleanWs() // Cleaning previous CI build in workspace
}
stage('checkout repo'){
retry(5){ // Retry if the cloning fails due to some reason
checkout scm // Clone the repo on Runner
}
}
stage('Compiling llama.cpp'){
sh'''#!/bin/bash
make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
'''
}
stage('Running llama.cpp'){
sh'''#!/bin/bash
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
cat llama_log.txt # Printing results
'''
}
}
ollama/llm/llama.cpp/.devops/full-cuda.Dockerfile
deleted
100755 → 0
View file @
cb75098c
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
CUDA_VERSION=11.7.1
# Target the CUDA build image
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
FROM
${BASE_CUDA_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
ARG
CUDA_DOCKER_ARCH=all
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV
GGML_CUDA=1
# Enable cURL
ENV
LLAMA_CURL=1
RUN
make
-j
$(
nproc
)
ENTRYPOINT
["/app/.devops/tools.sh"]
ollama/llm/llama.cpp/.devops/full-rocm.Dockerfile
deleted
100755 → 0
View file @
cb75098c
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
ROCM_VERSION=5.6
# Target the CUDA build image
ARG
BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
FROM
${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG
ROCM_DOCKER_ARCH=\
gfx803 \
gfx900 \
gfx906 \
gfx908 \
gfx90a \
gfx1010 \
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
ENV
GGML_HIPBLAS=1
ENV
CC=/opt/rocm/llvm/bin/clang
ENV
CXX=/opt/rocm/llvm/bin/clang++
# Enable cURL
ENV
LLAMA_CURL=1
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev
RUN
make
-j
$(
nproc
)
ENTRYPOINT
["/app/.devops/tools.sh"]
ollama/llm/llama.cpp/.devops/full.Dockerfile
deleted
100755 → 0
View file @
cb75098c
ARG
UBUNTU_VERSION=22.04
FROM
ubuntu:$UBUNTU_VERSION AS build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
ENV
LLAMA_CURL=1
RUN
make
-j
$(
nproc
)
ENV
LC_ALL=C.utf8
ENTRYPOINT
["/app/.devops/tools.sh"]
ollama/llm/llama.cpp/.devops/llama-cli-cuda.Dockerfile
deleted
100755 → 0
View file @
cb75098c
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
CUDA_VERSION=11.7.1
# Target the CUDA build image
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the CUDA runtime image
ARG
BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
FROM
${BASE_CUDA_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
ARG
CUDA_DOCKER_ARCH=all
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV
GGML_CUDA=1
RUN
make
-j
$(
nproc
)
llama-cli
FROM
${BASE_CUDA_RUN_CONTAINER} AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libgomp1
COPY
--from=build /app/llama-cli /llama-cli
ENTRYPOINT
[ "/llama-cli" ]
ollama/llm/llama.cpp/.devops/llama-cli-intel.Dockerfile
deleted
100755 → 0
View file @
cb75098c
ARG
ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS build
ARG
GGML_SYCL_F16=OFF
RUN
apt-get update
&&
\
apt-get
install
-y
git
WORKDIR
/app
COPY
. .
RUN if
[
"
${
GGML_SYCL_F16
}
"
=
"ON"
]
;
then
\
echo
"GGML_SYCL_F16 is set"
&&
\
export
OPT_SYCL_F16
=
"-DGGML_SYCL_F16=ON"
;
\
fi
&&
\
echo
"Building with static libs"
&&
\
cmake
-B
build
-DGGML_SYCL
=
ON
-DCMAKE_C_COMPILER
=
icx
-DCMAKE_CXX_COMPILER
=
icpx
\
${
OPT_SYCL_F16
}
-DBUILD_SHARED_LIBS
=
OFF
&&
\
cmake
--build
build
--config
Release
--target
llama-cli
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
COPY
--from=build /app/build/bin/llama-cli /llama-cli
ENV
LC_ALL=C.utf8
ENTRYPOINT
[ "/llama-cli" ]
ollama/llm/llama.cpp/.devops/llama-cli-rocm.Dockerfile
deleted
100755 → 0
View file @
cb75098c
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
ROCM_VERSION=5.6
# Target the CUDA build image
ARG
BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
FROM
${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG
ROCM_DOCKER_ARCH=\
gfx803 \
gfx900 \
gfx906 \
gfx908 \
gfx90a \
gfx1010 \
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
ENV
GGML_HIPBLAS=1
ENV
CC=/opt/rocm/llvm/bin/clang
ENV
CXX=/opt/rocm/llvm/bin/clang++
RUN
make
-j
$(
nproc
)
llama-cli
ENTRYPOINT
[ "/app/llama-cli" ]
ollama/llm/llama.cpp/.devops/llama-cli-vulkan.Dockerfile
deleted
100755 → 0
View file @
cb75098c
ARG
UBUNTU_VERSION=jammy
FROM
ubuntu:$UBUNTU_VERSION AS build
# Install build tools
RUN
apt update
&&
apt
install
-y
git build-essential cmake wget libgomp1
# Install Vulkan SDK
RUN
wget
-qO
- https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add -
&&
\
wget
-qO
/etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
&&
\
apt update
-y
&&
\
apt-get
install
-y
vulkan-sdk
# Build it
WORKDIR
/app
COPY
. .
RUN
cmake
-B
build
-DGGML_VULKAN
=
1
&&
\
cmake
--build
build
--config
Release
--target
llama-cli
# Clean up
WORKDIR
/
RUN
cp
/app/build/bin/llama-cli /llama-cli
&&
\
rm
-rf
/app
ENV
LC_ALL=C.utf8
ENTRYPOINT
[ "/llama-cli" ]
ollama/llm/llama.cpp/.devops/llama-cli.Dockerfile
deleted
100755 → 0
View file @
cb75098c
ARG
UBUNTU_VERSION=22.04
FROM
ubuntu:$UBUNTU_VERSION AS build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git
WORKDIR
/app
COPY
. .
RUN
make
-j
$(
nproc
)
llama-cli
FROM
ubuntu:$UBUNTU_VERSION AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libgomp1
COPY
--from=build /app/llama-cli /llama-cli
ENV
LC_ALL=C.utf8
ENTRYPOINT
[ "/llama-cli" ]
ollama/llm/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
deleted
100755 → 0
View file @
cb75098c
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
# Notes for llama.cpp:
# 1. Tags are currently based on hash - which will not sort asciibetically.
# We need to declare standard versioning if people want to sort latest releases.
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
# It is up to the user to install the correct vendor-specific support.
Name: llama.cpp-cuda
Version: %( date "+%%Y%%m%%d" )
Release: 1%{?dist}
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
License: MIT
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
BuildRequires: coreutils make gcc-c++ git cuda-toolkit
Requires: cuda-toolkit
URL: https://github.com/ggerganov/llama.cpp
%define debug_package %{nil}
%define source_date_epoch_from_changelog 0
%description
CPU inference for Meta's Lllama2 models using default options.
%prep
%setup -n llama.cpp-master
%build
make -j GGML_CUDA=1
%install
mkdir -p %{buildroot}%{_bindir}/
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
[Unit]
Description=Llama.cpp server, CPU only (no GPU support in this build).
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never
[Install]
WantedBy=default.target
EOF
mkdir -p %{buildroot}/etc/sysconfig
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
EOF
%clean
rm -rf %{buildroot}
rm -rf %{_builddir}/*
%files
%{_bindir}/llama-cuda-cli
%{_bindir}/llama-cuda-server
%{_bindir}/llama-cuda-simple
/usr/lib/systemd/system/llamacuda.service
%config /etc/sysconfig/llama
%pre
%post
%preun
%postun
%changelog
ollama/llm/llama.cpp/.devops/llama-cpp.srpm.spec
deleted
100755 → 0
View file @
cb75098c
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
# Notes for llama.cpp:
# 1. Tags are currently based on hash - which will not sort asciibetically.
# We need to declare standard versioning if people want to sort latest releases.
# In the meantime, YYYYMMDD format will be used.
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
# It is up to the user to install the correct vendor-specific support.
Name: llama.cpp
Version: %( date "+%%Y%%m%%d" )
Release: 1%{?dist}
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
License: MIT
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
BuildRequires: coreutils make gcc-c++ git libstdc++-devel
Requires: libstdc++
URL: https://github.com/ggerganov/llama.cpp
%define debug_package %{nil}
%define source_date_epoch_from_changelog 0
%description
CPU inference for Meta's Lllama2 models using default options.
Models are not included in this package and must be downloaded separately.
%prep
%setup -n llama.cpp-master
%build
make -j
%install
mkdir -p %{buildroot}%{_bindir}/
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
cp -p llama-server %{buildroot}%{_bindir}/llama-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
[Unit]
Description=Llama.cpp server, CPU only (no GPU support in this build).
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never
[Install]
WantedBy=default.target
EOF
mkdir -p %{buildroot}/etc/sysconfig
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
EOF
%clean
rm -rf %{buildroot}
rm -rf %{_builddir}/*
%files
%{_bindir}/llama-cli
%{_bindir}/llama-server
%{_bindir}/llama-simple
/usr/lib/systemd/system/llama.service
%config /etc/sysconfig/llama
%pre
%post
%preun
%postun
%changelog
ollama/llm/llama.cpp/.devops/llama-server-cuda.Dockerfile
deleted
100755 → 0
View file @
cb75098c
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
CUDA_VERSION=11.7.1
# Target the CUDA build image
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the CUDA runtime image
ARG
BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
FROM
${BASE_CUDA_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
ARG
CUDA_DOCKER_ARCH=all
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git libcurl4-openssl-dev
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV
GGML_CUDA=1
# Enable cURL
ENV
LLAMA_CURL=1
RUN
make
-j
$(
nproc
)
llama-server
FROM
${BASE_CUDA_RUN_CONTAINER} AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev libgomp1 curl
COPY
--from=build /app/llama-server /llama-server
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/llama-server" ]
ollama/llm/llama.cpp/.devops/llama-server-intel.Dockerfile
deleted
100755 → 0
View file @
cb75098c
ARG
ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS build
ARG
GGML_SYCL_F16=OFF
RUN
apt-get update
&&
\
apt-get
install
-y
git libcurl4-openssl-dev
WORKDIR
/app
COPY
. .
RUN if
[
"
${
GGML_SYCL_F16
}
"
=
"ON"
]
;
then
\
echo
"GGML_SYCL_F16 is set"
&&
\
export
OPT_SYCL_F16
=
"-DGGML_SYCL_F16=ON"
;
\
fi
&&
\
echo
"Building with dynamic libs"
&&
\
cmake
-B
build
-DGGML_SYCL
=
ON
-DCMAKE_C_COMPILER
=
icx
-DCMAKE_CXX_COMPILER
=
icpx
-DLLAMA_CURL
=
ON
${
OPT_SYCL_F16
}
&&
\
cmake
--build
build
--config
Release
--target
llama-server
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev curl
COPY
--from=build /app/build/bin/llama-server /llama-server
ENV
LC_ALL=C.utf8
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/llama-server" ]
ollama/llm/llama.cpp/.devops/llama-server-rocm.Dockerfile
deleted
100755 → 0
View file @
cb75098c
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
ROCM_VERSION=5.6
# Target the CUDA build image
ARG
BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
FROM
${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG
ROCM_DOCKER_ARCH=\
gfx803 \
gfx900 \
gfx906 \
gfx908 \
gfx90a \
gfx1010 \
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
ENV
GGML_HIPBLAS=1
ENV
CC=/opt/rocm/llvm/bin/clang
ENV
CXX=/opt/rocm/llvm/bin/clang++
# Enable cURL
ENV
LLAMA_CURL=1
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev curl
RUN
make
-j
$(
nproc
)
llama-server
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/app/llama-server" ]
ollama/llm/llama.cpp/.devops/llama-server-vulkan.Dockerfile
deleted
100755 → 0
View file @
cb75098c
ARG
UBUNTU_VERSION=jammy
FROM
ubuntu:$UBUNTU_VERSION AS build
# Install build tools
RUN
apt update
&&
apt
install
-y
git build-essential cmake wget
# Install Vulkan SDK and cURL
RUN
wget
-qO
- https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add -
&&
\
wget
-qO
/etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
&&
\
apt update
-y
&&
\
apt-get
install
-y
vulkan-sdk libcurl4-openssl-dev curl
# Build it
WORKDIR
/app
COPY
. .
RUN
cmake
-B
build
-DGGML_VULKAN
=
1
-DLLAMA_CURL
=
1
&&
\
cmake
--build
build
--config
Release
--target
llama-server
# Clean up
WORKDIR
/
RUN
cp
/app/build/bin/llama-server /llama-server
&&
\
rm
-rf
/app
ENV
LC_ALL=C.utf8
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/llama-server" ]
ollama/llm/llama.cpp/.devops/llama-server.Dockerfile
deleted
100755 → 0
View file @
cb75098c
ARG
UBUNTU_VERSION=22.04
FROM
ubuntu:$UBUNTU_VERSION AS build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git libcurl4-openssl-dev
WORKDIR
/app
COPY
. .
ENV
LLAMA_CURL=1
RUN
make
-j
$(
nproc
)
llama-server
FROM
ubuntu:$UBUNTU_VERSION AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev libgomp1 curl
COPY
--from=build /app/llama-server /llama-server
ENV
LC_ALL=C.utf8
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/llama-server" ]
ollama/llm/llama.cpp/.devops/nix/apps.nix
deleted
100755 → 0
View file @
cb75098c
{
perSystem
=
{
config
,
lib
,
...
}:
{
apps
=
let
inherit
(
config
.
packages
)
default
;
binaries
=
[
"llama-cli"
"llama-embedding"
"llama-server"
"llama-quantize"
];
mkApp
=
name
:
{
type
=
"app"
;
program
=
"
${
default
}
/bin/
${
name
}
"
;
};
in
lib
.
genAttrs
binaries
mkApp
;
};
}
Prev
1
…
9
10
11
12
13
14
15
16
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment