Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
e2c3f6b3
Commit
e2c3f6b3
authored
Jul 03, 2024
by
Michael Yang
Browse files
string
parent
8570c1c0
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
85 additions
and
84 deletions
+85
-84
envconfig/config.go
envconfig/config.go
+72
-71
gpu/amd_linux.go
gpu/amd_linux.go
+4
-4
gpu/amd_windows.go
gpu/amd_windows.go
+1
-1
gpu/assets.go
gpu/assets.go
+3
-3
gpu/gpu.go
gpu/gpu.go
+4
-4
llm/server.go
llm/server.go
+1
-1
No files found.
envconfig/config.go
View file @
e2c3f6b3
...
...
@@ -149,30 +149,77 @@ var (
IntelGPU
=
Bool
(
"OLLAMA_INTEL_GPU"
)
)
func
String
(
s
string
)
func
()
string
{
return
func
()
string
{
return
getenv
(
s
)
}
}
var
(
LLMLibrary
=
String
(
"OLLAMA_LLM_LIBRARY"
)
TmpDir
=
String
(
"OLLAMA_TMPDIR"
)
CudaVisibleDevices
=
String
(
"CUDA_VISIBLE_DEVICES"
)
HipVisibleDevices
=
String
(
"HIP_VISIBLE_DEVICES"
)
RocrVisibleDevices
=
String
(
"ROCR_VISIBLE_DEVICES"
)
GpuDeviceOrdinal
=
String
(
"GPU_DEVICE_ORDINAL"
)
HsaOverrideGfxVersion
=
String
(
"HSA_OVERRIDE_GFX_VERSION"
)
)
func
RunnersDir
()
(
p
string
)
{
if
p
:=
getenv
(
"OLLAMA_RUNNERS_DIR"
);
p
!=
""
{
return
p
}
if
runtime
.
GOOS
!=
"windows"
{
return
}
defer
func
()
{
if
p
==
""
{
slog
.
Error
(
"unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'"
)
}
}()
// On Windows we do not carry the payloads inside the main executable
exe
,
err
:=
os
.
Executable
()
if
err
!=
nil
{
return
}
cwd
,
err
:=
os
.
Getwd
()
if
err
!=
nil
{
return
}
var
paths
[]
string
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
exe
),
cwd
}
{
paths
=
append
(
paths
,
root
,
filepath
.
Join
(
root
,
"windows-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"dist"
,
"windows-"
+
runtime
.
GOARCH
),
)
}
// Try a few variations to improve developer experience when building from source in the local tree
for
_
,
path
:=
range
paths
{
candidate
:=
filepath
.
Join
(
path
,
"ollama_runners"
)
if
_
,
err
:=
os
.
Stat
(
candidate
);
err
==
nil
{
p
=
candidate
break
}
}
return
p
}
var
(
// Set via OLLAMA_LLM_LIBRARY in the environment
LLMLibrary
string
// Set via OLLAMA_MAX_LOADED_MODELS in the environment
MaxRunners
int
// Set via OLLAMA_MAX_QUEUE in the environment
MaxQueuedRequests
int
// Set via OLLAMA_NUM_PARALLEL in the environment
NumParallel
int
// Set via OLLAMA_RUNNERS_DIR in the environment
RunnersDir
string
// Set via OLLAMA_TMPDIR in the environment
TmpDir
string
// Set via CUDA_VISIBLE_DEVICES in the environment
CudaVisibleDevices
string
// Set via HIP_VISIBLE_DEVICES in the environment
HipVisibleDevices
string
// Set via ROCR_VISIBLE_DEVICES in the environment
RocrVisibleDevices
string
// Set via GPU_DEVICE_ORDINAL in the environment
GpuDeviceOrdinal
string
// Set via HSA_OVERRIDE_GFX_VERSION in the environment
HsaOverrideGfxVersion
string
)
type
EnvVar
struct
{
...
...
@@ -187,7 +234,7 @@ func AsMap() map[string]EnvVar {
"OLLAMA_FLASH_ATTENTION"
:
{
"OLLAMA_FLASH_ATTENTION"
,
FlashAttention
(),
"Enabled flash attention"
},
"OLLAMA_HOST"
:
{
"OLLAMA_HOST"
,
Host
(),
"IP Address for the ollama server (default 127.0.0.1:11434)"
},
"OLLAMA_KEEP_ALIVE"
:
{
"OLLAMA_KEEP_ALIVE"
,
KeepAlive
(),
"The duration that models stay loaded in memory (default
\"
5m
\"
)"
},
"OLLAMA_LLM_LIBRARY"
:
{
"OLLAMA_LLM_LIBRARY"
,
LLMLibrary
,
"Set LLM library to bypass autodetection"
},
"OLLAMA_LLM_LIBRARY"
:
{
"OLLAMA_LLM_LIBRARY"
,
LLMLibrary
()
,
"Set LLM library to bypass autodetection"
},
"OLLAMA_MAX_LOADED_MODELS"
:
{
"OLLAMA_MAX_LOADED_MODELS"
,
MaxRunners
,
"Maximum number of loaded models per GPU"
},
"OLLAMA_MAX_QUEUE"
:
{
"OLLAMA_MAX_QUEUE"
,
MaxQueuedRequests
,
"Maximum number of queued requests"
},
"OLLAMA_MODELS"
:
{
"OLLAMA_MODELS"
,
Models
(),
"The path to the models directory"
},
...
...
@@ -195,16 +242,16 @@ func AsMap() map[string]EnvVar {
"OLLAMA_NOPRUNE"
:
{
"OLLAMA_NOPRUNE"
,
NoPrune
(),
"Do not prune model blobs on startup"
},
"OLLAMA_NUM_PARALLEL"
:
{
"OLLAMA_NUM_PARALLEL"
,
NumParallel
,
"Maximum number of parallel requests"
},
"OLLAMA_ORIGINS"
:
{
"OLLAMA_ORIGINS"
,
Origins
(),
"A comma separated list of allowed origins"
},
"OLLAMA_RUNNERS_DIR"
:
{
"OLLAMA_RUNNERS_DIR"
,
RunnersDir
,
"Location for runners"
},
"OLLAMA_RUNNERS_DIR"
:
{
"OLLAMA_RUNNERS_DIR"
,
RunnersDir
()
,
"Location for runners"
},
"OLLAMA_SCHED_SPREAD"
:
{
"OLLAMA_SCHED_SPREAD"
,
SchedSpread
(),
"Always schedule model across all GPUs"
},
"OLLAMA_TMPDIR"
:
{
"OLLAMA_TMPDIR"
,
TmpDir
,
"Location for temporary files"
},
"OLLAMA_TMPDIR"
:
{
"OLLAMA_TMPDIR"
,
TmpDir
()
,
"Location for temporary files"
},
}
if
runtime
.
GOOS
!=
"darwin"
{
ret
[
"CUDA_VISIBLE_DEVICES"
]
=
EnvVar
{
"CUDA_VISIBLE_DEVICES"
,
CudaVisibleDevices
,
"Set which NVIDIA devices are visible"
}
ret
[
"HIP_VISIBLE_DEVICES"
]
=
EnvVar
{
"HIP_VISIBLE_DEVICES"
,
HipVisibleDevices
,
"Set which AMD devices are visible"
}
ret
[
"ROCR_VISIBLE_DEVICES"
]
=
EnvVar
{
"ROCR_VISIBLE_DEVICES"
,
RocrVisibleDevices
,
"Set which AMD devices are visible"
}
ret
[
"GPU_DEVICE_ORDINAL"
]
=
EnvVar
{
"GPU_DEVICE_ORDINAL"
,
GpuDeviceOrdinal
,
"Set which AMD devices are visible"
}
ret
[
"HSA_OVERRIDE_GFX_VERSION"
]
=
EnvVar
{
"HSA_OVERRIDE_GFX_VERSION"
,
HsaOverrideGfxVersion
,
"Override the gfx used for all detected AMD GPUs"
}
ret
[
"CUDA_VISIBLE_DEVICES"
]
=
EnvVar
{
"CUDA_VISIBLE_DEVICES"
,
CudaVisibleDevices
()
,
"Set which NVIDIA devices are visible"
}
ret
[
"HIP_VISIBLE_DEVICES"
]
=
EnvVar
{
"HIP_VISIBLE_DEVICES"
,
HipVisibleDevices
()
,
"Set which AMD devices are visible"
}
ret
[
"ROCR_VISIBLE_DEVICES"
]
=
EnvVar
{
"ROCR_VISIBLE_DEVICES"
,
RocrVisibleDevices
()
,
"Set which AMD devices are visible"
}
ret
[
"GPU_DEVICE_ORDINAL"
]
=
EnvVar
{
"GPU_DEVICE_ORDINAL"
,
GpuDeviceOrdinal
()
,
"Set which AMD devices are visible"
}
ret
[
"HSA_OVERRIDE_GFX_VERSION"
]
=
EnvVar
{
"HSA_OVERRIDE_GFX_VERSION"
,
HsaOverrideGfxVersion
()
,
"Override the gfx used for all detected AMD GPUs"
}
ret
[
"OLLAMA_INTEL_GPU"
]
=
EnvVar
{
"OLLAMA_INTEL_GPU"
,
IntelGPU
(),
"Enable experimental Intel GPU detection"
}
}
return
ret
...
...
@@ -233,46 +280,6 @@ func init() {
}
func
LoadConfig
()
{
RunnersDir
=
getenv
(
"OLLAMA_RUNNERS_DIR"
)
if
runtime
.
GOOS
==
"windows"
&&
RunnersDir
==
""
{
// On Windows we do not carry the payloads inside the main executable
appExe
,
err
:=
os
.
Executable
()
if
err
!=
nil
{
slog
.
Error
(
"failed to lookup executable path"
,
"error"
,
err
)
}
cwd
,
err
:=
os
.
Getwd
()
if
err
!=
nil
{
slog
.
Error
(
"failed to lookup working directory"
,
"error"
,
err
)
}
var
paths
[]
string
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
appExe
),
cwd
}
{
paths
=
append
(
paths
,
root
,
filepath
.
Join
(
root
,
"windows-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"dist"
,
"windows-"
+
runtime
.
GOARCH
),
)
}
// Try a few variations to improve developer experience when building from source in the local tree
for
_
,
p
:=
range
paths
{
candidate
:=
filepath
.
Join
(
p
,
"ollama_runners"
)
_
,
err
:=
os
.
Stat
(
candidate
)
if
err
==
nil
{
RunnersDir
=
candidate
break
}
}
if
RunnersDir
==
""
{
slog
.
Error
(
"unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'"
)
}
}
TmpDir
=
getenv
(
"OLLAMA_TMPDIR"
)
LLMLibrary
=
getenv
(
"OLLAMA_LLM_LIBRARY"
)
if
onp
:=
getenv
(
"OLLAMA_NUM_PARALLEL"
);
onp
!=
""
{
val
,
err
:=
strconv
.
Atoi
(
onp
)
if
err
!=
nil
{
...
...
@@ -300,10 +307,4 @@ func LoadConfig() {
MaxQueuedRequests
=
p
}
}
CudaVisibleDevices
=
getenv
(
"CUDA_VISIBLE_DEVICES"
)
HipVisibleDevices
=
getenv
(
"HIP_VISIBLE_DEVICES"
)
RocrVisibleDevices
=
getenv
(
"ROCR_VISIBLE_DEVICES"
)
GpuDeviceOrdinal
=
getenv
(
"GPU_DEVICE_ORDINAL"
)
HsaOverrideGfxVersion
=
getenv
(
"HSA_OVERRIDE_GFX_VERSION"
)
}
gpu/amd_linux.go
View file @
e2c3f6b3
...
...
@@ -60,9 +60,9 @@ func AMDGetGPUInfo() []RocmGPUInfo {
// Determine if the user has already pre-selected which GPUs to look at, then ignore the others
var
visibleDevices
[]
string
hipVD
:=
envconfig
.
HipVisibleDevices
// zero based index only
rocrVD
:=
envconfig
.
RocrVisibleDevices
// zero based index or UUID, but consumer cards seem to not support UUID
gpuDO
:=
envconfig
.
GpuDeviceOrdinal
// zero based index
hipVD
:=
envconfig
.
HipVisibleDevices
()
// zero based index only
rocrVD
:=
envconfig
.
RocrVisibleDevices
()
// zero based index or UUID, but consumer cards seem to not support UUID
gpuDO
:=
envconfig
.
GpuDeviceOrdinal
()
// zero based index
switch
{
// TODO is this priorty order right?
case
hipVD
!=
""
:
...
...
@@ -75,7 +75,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
visibleDevices
=
strings
.
Split
(
gpuDO
,
","
)
}
gfxOverride
:=
envconfig
.
HsaOverrideGfxVersion
gfxOverride
:=
envconfig
.
HsaOverrideGfxVersion
()
var
supported
[]
string
libDir
:=
""
...
...
gpu/amd_windows.go
View file @
e2c3f6b3
...
...
@@ -53,7 +53,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
}
var
supported
[]
string
gfxOverride
:=
envconfig
.
HsaOverrideGfxVersion
gfxOverride
:=
envconfig
.
HsaOverrideGfxVersion
()
if
gfxOverride
==
""
{
supported
,
err
=
GetSupportedGFX
(
libDir
)
if
err
!=
nil
{
...
...
gpu/assets.go
View file @
e2c3f6b3
...
...
@@ -26,7 +26,7 @@ func PayloadsDir() (string, error) {
defer
lock
.
Unlock
()
var
err
error
if
payloadsDir
==
""
{
runnersDir
:=
envconfig
.
RunnersDir
runnersDir
:=
envconfig
.
RunnersDir
()
if
runnersDir
!=
""
{
payloadsDir
=
runnersDir
...
...
@@ -35,7 +35,7 @@ func PayloadsDir() (string, error) {
// The remainder only applies on non-windows where we still carry payloads in the main executable
cleanupTmpDirs
()
tmpDir
:=
envconfig
.
TmpDir
tmpDir
:=
envconfig
.
TmpDir
()
if
tmpDir
==
""
{
tmpDir
,
err
=
os
.
MkdirTemp
(
""
,
"ollama"
)
if
err
!=
nil
{
...
...
@@ -105,7 +105,7 @@ func cleanupTmpDirs() {
func
Cleanup
()
{
lock
.
Lock
()
defer
lock
.
Unlock
()
runnersDir
:=
envconfig
.
RunnersDir
runnersDir
:=
envconfig
.
RunnersDir
()
if
payloadsDir
!=
""
&&
runnersDir
==
""
&&
runtime
.
GOOS
!=
"windows"
{
// We want to fully clean up the tmpdir parent of the payloads dir
tmpDir
:=
filepath
.
Clean
(
filepath
.
Join
(
payloadsDir
,
".."
))
...
...
gpu/gpu.go
View file @
e2c3f6b3
...
...
@@ -230,8 +230,8 @@ func GetGPUInfo() GpuInfoList {
// On windows we bundle the nvidia library one level above the runner dir
depPath
:=
""
if
runtime
.
GOOS
==
"windows"
&&
envconfig
.
RunnersDir
!=
""
{
depPath
=
filepath
.
Join
(
filepath
.
Dir
(
envconfig
.
RunnersDir
),
"cuda"
)
if
runtime
.
GOOS
==
"windows"
&&
envconfig
.
RunnersDir
()
!=
""
{
depPath
=
filepath
.
Join
(
filepath
.
Dir
(
envconfig
.
RunnersDir
()
),
"cuda"
)
}
// Load ALL libraries
...
...
@@ -306,8 +306,8 @@ func GetGPUInfo() GpuInfoList {
oHandles
=
initOneAPIHandles
()
// On windows we bundle the oneapi library one level above the runner dir
depPath
=
""
if
runtime
.
GOOS
==
"windows"
&&
envconfig
.
RunnersDir
!=
""
{
depPath
=
filepath
.
Join
(
filepath
.
Dir
(
envconfig
.
RunnersDir
),
"oneapi"
)
if
runtime
.
GOOS
==
"windows"
&&
envconfig
.
RunnersDir
()
!=
""
{
depPath
=
filepath
.
Join
(
filepath
.
Dir
(
envconfig
.
RunnersDir
()
),
"oneapi"
)
}
for
d
:=
range
oHandles
.
oneapi
.
num_drivers
{
...
...
llm/server.go
View file @
e2c3f6b3
...
...
@@ -163,7 +163,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
}
else
{
servers
=
serversForGpu
(
gpus
[
0
])
// All GPUs in the list are matching Library and Variant
}
demandLib
:=
envconfig
.
LLMLibrary
demandLib
:=
envconfig
.
LLMLibrary
()
if
demandLib
!=
""
{
serverPath
:=
availableServers
[
demandLib
]
if
serverPath
==
""
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment