Commit 9edc181e authored by Daniele Viti's avatar Daniele Viti
Browse files

Merge branch 'dev'

parents 7354e8eb d8aa1697
......@@ -85,17 +85,29 @@ If you don't have Ollama installed yet, you can use the provided bash script for
For cpu-only container
```bash
chmod +x run-compose.sh && ./run-compose.sh
./run-compose.sh
```
For gpu-enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html))
For GPU enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html))
Warning! A GPU-enabled installation has only been tested using linux and nvidia GPU, full functionalities are not guaranteed under Windows or Macos or using a different GPU
```bash
chmod +x run-compose.sh && ./run-compose.sh --enable-gpu[count=1]
./run-compose.sh --enable-gpu
```
Note that both the above commands will use the latest production docker image in repository, to be able to build the latest local version you'll need to append the `--build` parameter, for example:
```bash
./run-compose.sh --build --enable-gpu[count=1]
./run-compose.sh --enable-gpu --build
```
### Installing Both Ollama and Ollama Web UI Using Docker Compose
To install using docker compose script as CPU-only installation simply run this command
```bash
docker compose up -d
```
for a GPU-enabled installation (provided you installed the necessary gpu drivers and you are using nvidia)
```bash
docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up -d
```
### Installing Both Ollama and Ollama Web UI Using Kustomize
......
......@@ -4,6 +4,7 @@ metadata:
name: ollama-service
namespace: {{ .Values.namespace }}
spec:
type: {{ .Values.ollama.service.type }}
selector:
app: ollama
ports:
......
......@@ -19,15 +19,32 @@ spec:
image: {{ .Values.ollama.image }}
ports:
- containerPort: {{ .Values.ollama.servicePort }}
resources:
limits:
cpu: {{ .Values.ollama.resources.limits.cpu }}
memory: {{ .Values.ollama.resources.limits.memory }}
nvidia.com/gpu: {{ .Values.ollama.resources.limits.gpu }}
env:
{{- if .Values.ollama.gpu.enabled }}
- name: PATH
value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
- name: LD_LIBRARY_PATH
value: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
{{- end}}
{{- if .Values.ollama.resources }}
resources: {{- toYaml .Values.ollama.resources | nindent 10 }}
{{- end }}
volumeMounts:
- name: ollama-volume
mountPath: /root/.ollama
tty: true
{{- with .Values.ollama.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
tolerations:
{{- if .Values.ollama.gpu.enabled }}
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
{{- end }}
volumeClaimTemplates:
- metadata:
name: ollama-volume
......@@ -35,4 +52,4 @@ spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 1Gi
\ No newline at end of file
storage: {{ .Values.ollama.volumeSize }}
\ No newline at end of file
......@@ -15,14 +15,24 @@ spec:
spec:
containers:
- name: ollama-webui
image: ghcr.io/ollama-webui/ollama-webui:main
image: {{ .Values.webui.image }}
ports:
- containerPort: 8080
resources:
limits:
cpu: "500m"
memory: "500Mi"
{{- if .Values.webui.resources }}
resources: {{- toYaml .Values.webui.resources | nindent 10 }}
{{- end }}
volumeMounts:
- name: webui-volume
mountPath: /app/backend/data
env:
- name: OLLAMA_API_BASE_URL
value: "http://ollama-service.{{ .Values.namespace }}.svc.cluster.local:{{ .Values.ollama.servicePort }}/api"
tty: true
{{- with .Values.webui.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
volumes:
- name: webui-volume
persistentVolumeClaim:
claimName: ollama-webui-pvc
\ No newline at end of file
{{- if .Values.webui.ingress.enabled }}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: ollama-webui-ingress
namespace: {{ .Values.namespace }}
#annotations:
# Use appropriate annotations for your Ingress controller, e.g., for NGINX:
# nginx.ingress.kubernetes.io/rewrite-target: /
{{- if .Values.webui.ingress.annotations }}
annotations:
{{ toYaml .Values.webui.ingress.annotations | trimSuffix "\n" | indent 4 }}
{{- end }}
spec:
rules:
- host: {{ .Values.webui.ingress.host }}
......@@ -18,3 +20,4 @@ spec:
name: ollama-webui-service
port:
number: {{ .Values.webui.servicePort }}
{{- end }}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
labels:
app: ollama-webui
name: ollama-webui-pvc
namespace: {{ .Values.namespace }}
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: {{ .Values.webui.volumeSize }}
\ No newline at end of file
......@@ -4,7 +4,7 @@ metadata:
name: ollama-webui-service
namespace: {{ .Values.namespace }}
spec:
type: NodePort # Use LoadBalancer if you're on a cloud that supports it
type: {{ .Values.webui.service.type }} # Default: NodePort # Use LoadBalancer if you're on a cloud that supports it
selector:
app: ollama-webui
ports:
......
......@@ -10,6 +10,12 @@ ollama:
memory: "2Gi"
nvidia.com/gpu: "0"
volumeSize: 1Gi
nodeSelector: {}
tolerations: []
service:
type: ClusterIP
gpu:
enabled: false
webui:
replicaCount: 1
......@@ -20,4 +26,13 @@ webui:
cpu: "500m"
memory: "500Mi"
ingress:
enabled: true
annotations:
# Use appropriate annotations for your Ingress controller, e.g., for NGINX:
# nginx.ingress.kubernetes.io/rewrite-target: /
host: ollama.minikube.local
volumeSize: 1Gi
nodeSelector: {}
tolerations: []
service:
type: NodePort
\ No newline at end of file
......@@ -80,12 +80,12 @@ usage() {
echo " -h, --help Show this help message."
echo ""
echo "Examples:"
echo " ./$0 --drop"
echo " ./$0 --enable-gpu[count=1]"
echo " ./$0 --enable-api[port=11435]"
echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]"
echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]"
echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build"
echo " $0 --drop"
echo " $0 --enable-gpu[count=1]"
echo " $0 --enable-api[port=11435]"
echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]"
echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]"
echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build"
echo ""
echo "This script configures and runs a docker-compose setup with optional GPU support, API exposure, and web UI configuration."
echo "About the gpu to use, the script automatically detects it using the "lspci" command."
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment