Commit 9edc181e authored by Daniele Viti's avatar Daniele Viti
Browse files

Merge branch 'dev'

parents 7354e8eb d8aa1697
...@@ -85,17 +85,29 @@ If you don't have Ollama installed yet, you can use the provided bash script for ...@@ -85,17 +85,29 @@ If you don't have Ollama installed yet, you can use the provided bash script for
For cpu-only container For cpu-only container
```bash ```bash
chmod +x run-compose.sh && ./run-compose.sh ./run-compose.sh
``` ```
For gpu-enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)) For GPU enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html))
Warning! A GPU-enabled installation has only been tested using linux and nvidia GPU, full functionalities are not guaranteed under Windows or Macos or using a different GPU
```bash ```bash
chmod +x run-compose.sh && ./run-compose.sh --enable-gpu[count=1] ./run-compose.sh --enable-gpu
``` ```
Note that both the above commands will use the latest production docker image in repository, to be able to build the latest local version you'll need to append the `--build` parameter, for example: Note that both the above commands will use the latest production docker image in repository, to be able to build the latest local version you'll need to append the `--build` parameter, for example:
```bash ```bash
./run-compose.sh --build --enable-gpu[count=1] ./run-compose.sh --enable-gpu --build
```
### Installing Both Ollama and Ollama Web UI Using Docker Compose
To install using docker compose script as CPU-only installation simply run this command
```bash
docker compose up -d
```
for a GPU-enabled installation (provided you installed the necessary gpu drivers and you are using nvidia)
```bash
docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up -d
``` ```
### Installing Both Ollama and Ollama Web UI Using Kustomize ### Installing Both Ollama and Ollama Web UI Using Kustomize
......
...@@ -4,6 +4,7 @@ metadata: ...@@ -4,6 +4,7 @@ metadata:
name: ollama-service name: ollama-service
namespace: {{ .Values.namespace }} namespace: {{ .Values.namespace }}
spec: spec:
type: {{ .Values.ollama.service.type }}
selector: selector:
app: ollama app: ollama
ports: ports:
......
...@@ -19,15 +19,32 @@ spec: ...@@ -19,15 +19,32 @@ spec:
image: {{ .Values.ollama.image }} image: {{ .Values.ollama.image }}
ports: ports:
- containerPort: {{ .Values.ollama.servicePort }} - containerPort: {{ .Values.ollama.servicePort }}
resources: env:
limits: {{- if .Values.ollama.gpu.enabled }}
cpu: {{ .Values.ollama.resources.limits.cpu }} - name: PATH
memory: {{ .Values.ollama.resources.limits.memory }} value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
nvidia.com/gpu: {{ .Values.ollama.resources.limits.gpu }} - name: LD_LIBRARY_PATH
value: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
{{- end}}
{{- if .Values.ollama.resources }}
resources: {{- toYaml .Values.ollama.resources | nindent 10 }}
{{- end }}
volumeMounts: volumeMounts:
- name: ollama-volume - name: ollama-volume
mountPath: /root/.ollama mountPath: /root/.ollama
tty: true tty: true
{{- with .Values.ollama.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
tolerations:
{{- if .Values.ollama.gpu.enabled }}
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
{{- end }}
volumeClaimTemplates: volumeClaimTemplates:
- metadata: - metadata:
name: ollama-volume name: ollama-volume
...@@ -35,4 +52,4 @@ spec: ...@@ -35,4 +52,4 @@ spec:
accessModes: [ "ReadWriteOnce" ] accessModes: [ "ReadWriteOnce" ]
resources: resources:
requests: requests:
storage: 1Gi storage: {{ .Values.ollama.volumeSize }}
\ No newline at end of file \ No newline at end of file
...@@ -15,14 +15,24 @@ spec: ...@@ -15,14 +15,24 @@ spec:
spec: spec:
containers: containers:
- name: ollama-webui - name: ollama-webui
image: ghcr.io/ollama-webui/ollama-webui:main image: {{ .Values.webui.image }}
ports: ports:
- containerPort: 8080 - containerPort: 8080
resources: {{- if .Values.webui.resources }}
limits: resources: {{- toYaml .Values.webui.resources | nindent 10 }}
cpu: "500m" {{- end }}
memory: "500Mi" volumeMounts:
- name: webui-volume
mountPath: /app/backend/data
env: env:
- name: OLLAMA_API_BASE_URL - name: OLLAMA_API_BASE_URL
value: "http://ollama-service.{{ .Values.namespace }}.svc.cluster.local:{{ .Values.ollama.servicePort }}/api" value: "http://ollama-service.{{ .Values.namespace }}.svc.cluster.local:{{ .Values.ollama.servicePort }}/api"
tty: true tty: true
{{- with .Values.webui.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
volumes:
- name: webui-volume
persistentVolumeClaim:
claimName: ollama-webui-pvc
\ No newline at end of file
{{- if .Values.webui.ingress.enabled }}
apiVersion: networking.k8s.io/v1 apiVersion: networking.k8s.io/v1
kind: Ingress kind: Ingress
metadata: metadata:
name: ollama-webui-ingress name: ollama-webui-ingress
namespace: {{ .Values.namespace }} namespace: {{ .Values.namespace }}
#annotations: {{- if .Values.webui.ingress.annotations }}
# Use appropriate annotations for your Ingress controller, e.g., for NGINX: annotations:
# nginx.ingress.kubernetes.io/rewrite-target: / {{ toYaml .Values.webui.ingress.annotations | trimSuffix "\n" | indent 4 }}
{{- end }}
spec: spec:
rules: rules:
- host: {{ .Values.webui.ingress.host }} - host: {{ .Values.webui.ingress.host }}
...@@ -18,3 +20,4 @@ spec: ...@@ -18,3 +20,4 @@ spec:
name: ollama-webui-service name: ollama-webui-service
port: port:
number: {{ .Values.webui.servicePort }} number: {{ .Values.webui.servicePort }}
{{- end }}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
labels:
app: ollama-webui
name: ollama-webui-pvc
namespace: {{ .Values.namespace }}
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: {{ .Values.webui.volumeSize }}
\ No newline at end of file
...@@ -4,7 +4,7 @@ metadata: ...@@ -4,7 +4,7 @@ metadata:
name: ollama-webui-service name: ollama-webui-service
namespace: {{ .Values.namespace }} namespace: {{ .Values.namespace }}
spec: spec:
type: NodePort # Use LoadBalancer if you're on a cloud that supports it type: {{ .Values.webui.service.type }} # Default: NodePort # Use LoadBalancer if you're on a cloud that supports it
selector: selector:
app: ollama-webui app: ollama-webui
ports: ports:
......
...@@ -10,6 +10,12 @@ ollama: ...@@ -10,6 +10,12 @@ ollama:
memory: "2Gi" memory: "2Gi"
nvidia.com/gpu: "0" nvidia.com/gpu: "0"
volumeSize: 1Gi volumeSize: 1Gi
nodeSelector: {}
tolerations: []
service:
type: ClusterIP
gpu:
enabled: false
webui: webui:
replicaCount: 1 replicaCount: 1
...@@ -20,4 +26,13 @@ webui: ...@@ -20,4 +26,13 @@ webui:
cpu: "500m" cpu: "500m"
memory: "500Mi" memory: "500Mi"
ingress: ingress:
enabled: true
annotations:
# Use appropriate annotations for your Ingress controller, e.g., for NGINX:
# nginx.ingress.kubernetes.io/rewrite-target: /
host: ollama.minikube.local host: ollama.minikube.local
volumeSize: 1Gi
nodeSelector: {}
tolerations: []
service:
type: NodePort
\ No newline at end of file
...@@ -80,12 +80,12 @@ usage() { ...@@ -80,12 +80,12 @@ usage() {
echo " -h, --help Show this help message." echo " -h, --help Show this help message."
echo "" echo ""
echo "Examples:" echo "Examples:"
echo " ./$0 --drop" echo " $0 --drop"
echo " ./$0 --enable-gpu[count=1]" echo " $0 --enable-gpu[count=1]"
echo " ./$0 --enable-api[port=11435]" echo " $0 --enable-api[port=11435]"
echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]" echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]"
echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]" echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]"
echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build" echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build"
echo "" echo ""
echo "This script configures and runs a docker-compose setup with optional GPU support, API exposure, and web UI configuration." echo "This script configures and runs a docker-compose setup with optional GPU support, API exposure, and web UI configuration."
echo "About the gpu to use, the script automatically detects it using the "lspci" command." echo "About the gpu to use, the script automatically detects it using the "lspci" command."
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment