main.py 36.5 KB
Newer Older
Timothy J. Baek's avatar
Timothy J. Baek committed
1
2
3
4
5
6
7
8
from fastapi import (
    FastAPI,
    Request,
    HTTPException,
    Depends,
    UploadFile,
    File,
)
9
10
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
Timothy J. Baek's avatar
Timothy J. Baek committed
11

Timothy J. Baek's avatar
Timothy J. Baek committed
12
from pydantic import BaseModel, ConfigDict
13

Timothy J. Baek's avatar
Timothy J. Baek committed
14
import os
15
import re
16
import random
Timothy J. Baek's avatar
Timothy J. Baek committed
17
18
import requests
import json
19
20
import aiohttp
import asyncio
21
import logging
22
import time
Timothy J. Baek's avatar
Timothy J. Baek committed
23
24
25
from urllib.parse import urlparse
from typing import Optional, List, Union

26
27
from starlette.background import BackgroundTask

28
from apps.webui.models.models import Models
29
from constants import ERROR_MESSAGES
30
31
32
33
from utils.utils import (
    get_verified_user,
    get_admin_user,
)
34
from utils.task import prompt_template
Timothy J. Baek's avatar
Timothy J. Baek committed
35

Timothy J. Baek's avatar
Timothy J. Baek committed
36

Timothy J. Baek's avatar
Timothy J. Baek committed
37
38
39
from config import (
    SRC_LOG_LEVELS,
    OLLAMA_BASE_URLS,
Timothy J. Baek's avatar
Timothy J. Baek committed
40
    ENABLE_OLLAMA_API,
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
41
    AIOHTTP_CLIENT_TIMEOUT,
Timothy J. Baek's avatar
Timothy J. Baek committed
42
    ENABLE_MODEL_FILTER,
Timothy J. Baek's avatar
Timothy J. Baek committed
43
44
    MODEL_FILTER_LIST,
    UPLOAD_DIR,
45
    AppConfig,
Timothy J. Baek's avatar
Timothy J. Baek committed
46
)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
47
from utils.misc import calculate_sha256, add_or_update_system_message
48

49
50
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["OLLAMA"])
51

52
53
54
55
56
57
58
59
app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
Timothy J. Baek's avatar
Timothy J. Baek committed
60

61
app.state.config = AppConfig()
Timothy J. Baek's avatar
Timothy J. Baek committed
62

Timothy J. Baek's avatar
Timothy J. Baek committed
63
64
app.state.config.ENABLE_MODEL_FILTER = ENABLE_MODEL_FILTER
app.state.config.MODEL_FILTER_LIST = MODEL_FILTER_LIST
Timothy J. Baek's avatar
Timothy J. Baek committed
65
66

app.state.config.ENABLE_OLLAMA_API = ENABLE_OLLAMA_API
67
app.state.config.OLLAMA_BASE_URLS = OLLAMA_BASE_URLS
68
app.state.MODELS = {}
Timothy J. Baek's avatar
Timothy J. Baek committed
69
70


Timothy J. Baek's avatar
Timothy J. Baek committed
71
72
73
74
75
# TODO: Implement a more intelligent load balancing mechanism for distributing requests among multiple backend instances.
# Current implementation uses a simple round-robin approach (random.choice). Consider incorporating algorithms like weighted round-robin,
# least connections, or least response time for better resource utilization and performance optimization.


76
77
78
79
80
81
82
83
84
85
86
@app.middleware("http")
async def check_url(request: Request, call_next):
    if len(app.state.MODELS) == 0:
        await get_all_models()
    else:
        pass

    response = await call_next(request)
    return response


Timothy J. Baek's avatar
Timothy J. Baek committed
87
88
89
90
91
92
@app.head("/")
@app.get("/")
async def get_status():
    return {"status": True}


Timothy J. Baek's avatar
Timothy J. Baek committed
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
@app.get("/config")
async def get_config(user=Depends(get_admin_user)):
    return {"ENABLE_OLLAMA_API": app.state.config.ENABLE_OLLAMA_API}


class OllamaConfigForm(BaseModel):
    enable_ollama_api: Optional[bool] = None


@app.post("/config/update")
async def update_config(form_data: OllamaConfigForm, user=Depends(get_admin_user)):
    app.state.config.ENABLE_OLLAMA_API = form_data.enable_ollama_api
    return {"ENABLE_OLLAMA_API": app.state.config.ENABLE_OLLAMA_API}


108
109
@app.get("/urls")
async def get_ollama_api_urls(user=Depends(get_admin_user)):
110
    return {"OLLAMA_BASE_URLS": app.state.config.OLLAMA_BASE_URLS}
111

Timothy J. Baek's avatar
Timothy J. Baek committed
112

113
class UrlUpdateForm(BaseModel):
114
    urls: List[str]
115
116


117
@app.post("/urls/update")
Timothy J. Baek's avatar
Timothy J. Baek committed
118
async def update_ollama_api_url(form_data: UrlUpdateForm, user=Depends(get_admin_user)):
119
    app.state.config.OLLAMA_BASE_URLS = form_data.urls
120

121
122
    log.info(f"app.state.config.OLLAMA_BASE_URLS: {app.state.config.OLLAMA_BASE_URLS}")
    return {"OLLAMA_BASE_URLS": app.state.config.OLLAMA_BASE_URLS}
Timothy J. Baek's avatar
Timothy J. Baek committed
123
124


125
async def fetch_url(url):
Timothy J. Baek's avatar
Timothy J. Baek committed
126
    timeout = aiohttp.ClientTimeout(total=5)
127
    try:
128
        async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
129
130
131
132
            async with session.get(url) as response:
                return await response.json()
    except Exception as e:
        # Handle connection error here
133
        log.error(f"Connection error: {e}")
134
135
136
        return None


137
138
139
140
141
142
143
144
145
146
async def cleanup_response(
    response: Optional[aiohttp.ClientResponse],
    session: Optional[aiohttp.ClientSession],
):
    if response:
        response.close()
    if session:
        await session.close()


147
async def post_streaming_url(url: str, payload: str, stream: bool = True):
148
149
    r = None
    try:
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
150
151
152
        session = aiohttp.ClientSession(
            trust_env=True, timeout=aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT)
        )
153
154
155
        r = await session.post(url, data=payload)
        r.raise_for_status()

156
157
158
159
160
161
162
163
164
165
166
167
168
169
        if stream:
            return StreamingResponse(
                r.content,
                status_code=r.status,
                headers=dict(r.headers),
                background=BackgroundTask(
                    cleanup_response, response=r, session=session
                ),
            )
        else:
            res = await r.json()
            await cleanup_response(r, session)
            return res

170
171
172
173
174
175
176
    except Exception as e:
        error_detail = "Open WebUI: Server Connection Error"
        if r is not None:
            try:
                res = await r.json()
                if "error" in res:
                    error_detail = f"Ollama: {res['error']}"
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
177
            except Exception:
178
179
180
181
182
183
184
185
                error_detail = f"Ollama: {e}"

        raise HTTPException(
            status_code=r.status if r else 500,
            detail=error_detail,
        )


186
187
188
189
def merge_models_lists(model_lists):
    merged_models = {}

    for idx, model_list in enumerate(model_lists):
Timothy J. Baek's avatar
Timothy J. Baek committed
190
191
192
193
194
195
196
197
        if model_list is not None:
            for model in model_list:
                digest = model["digest"]
                if digest not in merged_models:
                    model["urls"] = [idx]
                    merged_models[digest] = model
                else:
                    merged_models[digest]["urls"].append(idx)
198
199
200
201
202

    return list(merged_models.values())


async def get_all_models():
203
    log.info("get_all_models()")
Timothy J. Baek's avatar
Timothy J. Baek committed
204
205
206
207
208
209
210
211
212
213
214
215

    if app.state.config.ENABLE_OLLAMA_API:
        tasks = [
            fetch_url(f"{url}/api/tags") for url in app.state.config.OLLAMA_BASE_URLS
        ]
        responses = await asyncio.gather(*tasks)

        models = {
            "models": merge_models_lists(
                map(
                    lambda response: response["models"] if response else None, responses
                )
216
            )
Timothy J. Baek's avatar
Timothy J. Baek committed
217
218
219
220
        }

    else:
        models = {"models": []}
Timothy J. Baek's avatar
Timothy J. Baek committed
221

222
223
224
225
226
227
228
229
    app.state.MODELS = {model["model"]: model for model in models["models"]}

    return models


@app.get("/api/tags")
@app.get("/api/tags/{url_idx}")
async def get_ollama_tags(
230
    url_idx: Optional[int] = None, user=Depends(get_verified_user)
231
):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
232
    if url_idx is None:
Timothy J. Baek's avatar
Timothy J. Baek committed
233
        models = await get_all_models()
234

Timothy J. Baek's avatar
Timothy J. Baek committed
235
        if app.state.config.ENABLE_MODEL_FILTER:
Timothy J. Baek's avatar
Timothy J. Baek committed
236
            if user.role == "user":
237
238
                models["models"] = list(
                    filter(
Timothy J. Baek's avatar
Timothy J. Baek committed
239
240
                        lambda model: model["name"]
                        in app.state.config.MODEL_FILTER_LIST,
241
242
                        models["models"],
                    )
Timothy J. Baek's avatar
Timothy J. Baek committed
243
244
245
                )
                return models
        return models
246
    else:
247
        url = app.state.config.OLLAMA_BASE_URLS[url_idx]
248
249

        r = None
250
251
252
253
254
255
        try:
            r = requests.request(method="GET", url=f"{url}/api/tags")
            r.raise_for_status()

            return r.json()
        except Exception as e:
256
            log.exception(e)
257
258
259
260
261
262
            error_detail = "Open WebUI: Server Connection Error"
            if r is not None:
                try:
                    res = r.json()
                    if "error" in res:
                        error_detail = f"Ollama: {res['error']}"
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
263
                except Exception:
264
265
266
267
268
269
270
271
272
273
274
                    error_detail = f"Ollama: {e}"

            raise HTTPException(
                status_code=r.status_code if r else 500,
                detail=error_detail,
            )


@app.get("/api/version")
@app.get("/api/version/{url_idx}")
async def get_ollama_versions(url_idx: Optional[int] = None):
275
    if app.state.config.ENABLE_OLLAMA_API:
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
276
        if url_idx is None:
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
            # returns lowest version
            tasks = [
                fetch_url(f"{url}/api/version")
                for url in app.state.config.OLLAMA_BASE_URLS
            ]
            responses = await asyncio.gather(*tasks)
            responses = list(filter(lambda x: x is not None, responses))

            if len(responses) > 0:
                lowest_version = min(
                    responses,
                    key=lambda x: tuple(
                        map(int, re.sub(r"^v|-.*", "", x["version"]).split("."))
                    ),
                )
292

293
294
295
296
297
298
                return {"version": lowest_version["version"]}
            else:
                raise HTTPException(
                    status_code=500,
                    detail=ERROR_MESSAGES.OLLAMA_NOT_FOUND,
                )
Timothy J. Baek's avatar
Timothy J. Baek committed
299
        else:
300
            url = app.state.config.OLLAMA_BASE_URLS[url_idx]
301

302
303
304
305
306
307
308
309
310
311
312
313
314
315
            r = None
            try:
                r = requests.request(method="GET", url=f"{url}/api/version")
                r.raise_for_status()

                return r.json()
            except Exception as e:
                log.exception(e)
                error_detail = "Open WebUI: Server Connection Error"
                if r is not None:
                    try:
                        res = r.json()
                        if "error" in res:
                            error_detail = f"Ollama: {res['error']}"
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
316
                    except Exception:
317
318
319
320
321
322
323
324
                        error_detail = f"Ollama: {e}"

                raise HTTPException(
                    status_code=r.status_code if r else 500,
                    detail=error_detail,
                )
    else:
        return {"version": False}
325
326
327
328
329
330
331
332
333
334
335


class ModelNameForm(BaseModel):
    name: str


@app.post("/api/pull")
@app.post("/api/pull/{url_idx}")
async def pull_model(
    form_data: ModelNameForm, url_idx: int = 0, user=Depends(get_admin_user)
):
336
    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
337
    log.info(f"url: {url}")
338

Timothy J. Baek's avatar
Timothy J. Baek committed
339
340
341
    # Admin should be able to pull models from any source
    payload = {**form_data.model_dump(exclude_none=True), "insecure": True}

342
    return await post_streaming_url(f"{url}/api/pull", json.dumps(payload))
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357


class PushModelForm(BaseModel):
    name: str
    insecure: Optional[bool] = None
    stream: Optional[bool] = None


@app.delete("/api/push")
@app.delete("/api/push/{url_idx}")
async def push_model(
    form_data: PushModelForm,
    url_idx: Optional[int] = None,
    user=Depends(get_admin_user),
):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
358
    if url_idx is None:
359
360
361
362
363
        if form_data.name in app.state.MODELS:
            url_idx = app.state.MODELS[form_data.name]["urls"][0]
        else:
            raise HTTPException(
                status_code=400,
Timothy J. Baek's avatar
Timothy J. Baek committed
364
                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.name),
365
366
            )

367
    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
368
    log.debug(f"url: {url}")
369

370
371
372
    return await post_streaming_url(
        f"{url}/api/push", form_data.model_dump_json(exclude_none=True).encode()
    )
373
374
375
376
377
378
379
380
381
382
383
384
385
386


class CreateModelForm(BaseModel):
    name: str
    modelfile: Optional[str] = None
    stream: Optional[bool] = None
    path: Optional[str] = None


@app.post("/api/create")
@app.post("/api/create/{url_idx}")
async def create_model(
    form_data: CreateModelForm, url_idx: int = 0, user=Depends(get_admin_user)
):
387
    log.debug(f"form_data: {form_data}")
388
    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
389
    log.info(f"url: {url}")
390

391
392
393
    return await post_streaming_url(
        f"{url}/api/create", form_data.model_dump_json(exclude_none=True).encode()
    )
394
395
396
397
398
399
400
401
402
403
404
405
406
407


class CopyModelForm(BaseModel):
    source: str
    destination: str


@app.post("/api/copy")
@app.post("/api/copy/{url_idx}")
async def copy_model(
    form_data: CopyModelForm,
    url_idx: Optional[int] = None,
    user=Depends(get_admin_user),
):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
408
    if url_idx is None:
409
410
411
412
413
        if form_data.source in app.state.MODELS:
            url_idx = app.state.MODELS[form_data.source]["urls"][0]
        else:
            raise HTTPException(
                status_code=400,
Timothy J. Baek's avatar
Timothy J. Baek committed
414
                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.source),
415
416
            )

417
    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
418
    log.info(f"url: {url}")
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
419
420
421
422
423
    r = requests.request(
        method="POST",
        url=f"{url}/api/copy",
        data=form_data.model_dump_json(exclude_none=True).encode(),
    )
424
425
426
427

    try:
        r.raise_for_status()

428
        log.debug(f"r.text: {r.text}")
429
430
431

        return True
    except Exception as e:
432
        log.exception(e)
433
434
435
436
437
438
        error_detail = "Open WebUI: Server Connection Error"
        if r is not None:
            try:
                res = r.json()
                if "error" in res:
                    error_detail = f"Ollama: {res['error']}"
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
439
            except Exception:
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
                error_detail = f"Ollama: {e}"

        raise HTTPException(
            status_code=r.status_code if r else 500,
            detail=error_detail,
        )


@app.delete("/api/delete")
@app.delete("/api/delete/{url_idx}")
async def delete_model(
    form_data: ModelNameForm,
    url_idx: Optional[int] = None,
    user=Depends(get_admin_user),
):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
455
    if url_idx is None:
456
457
458
459
460
        if form_data.name in app.state.MODELS:
            url_idx = app.state.MODELS[form_data.name]["urls"][0]
        else:
            raise HTTPException(
                status_code=400,
Timothy J. Baek's avatar
Timothy J. Baek committed
461
                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.name),
462
463
            )

464
    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
465
    log.info(f"url: {url}")
466

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
467
468
469
470
471
    r = requests.request(
        method="DELETE",
        url=f"{url}/api/delete",
        data=form_data.model_dump_json(exclude_none=True).encode(),
    )
472
473
474
    try:
        r.raise_for_status()

475
        log.debug(f"r.text: {r.text}")
476
477
478

        return True
    except Exception as e:
479
        log.exception(e)
480
481
482
483
484
485
        error_detail = "Open WebUI: Server Connection Error"
        if r is not None:
            try:
                res = r.json()
                if "error" in res:
                    error_detail = f"Ollama: {res['error']}"
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
486
            except Exception:
487
488
489
490
491
492
493
494
495
                error_detail = f"Ollama: {e}"

        raise HTTPException(
            status_code=r.status_code if r else 500,
            detail=error_detail,
        )


@app.post("/api/show")
496
async def show_model_info(form_data: ModelNameForm, user=Depends(get_verified_user)):
497
498
499
    if form_data.name not in app.state.MODELS:
        raise HTTPException(
            status_code=400,
Timothy J. Baek's avatar
Timothy J. Baek committed
500
            detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.name),
501
502
503
        )

    url_idx = random.choice(app.state.MODELS[form_data.name]["urls"])
504
    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
505
    log.info(f"url: {url}")
506

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
507
508
509
510
511
    r = requests.request(
        method="POST",
        url=f"{url}/api/show",
        data=form_data.model_dump_json(exclude_none=True).encode(),
    )
512
513
514
515
516
    try:
        r.raise_for_status()

        return r.json()
    except Exception as e:
517
        log.exception(e)
518
519
520
521
522
523
        error_detail = "Open WebUI: Server Connection Error"
        if r is not None:
            try:
                res = r.json()
                if "error" in res:
                    error_detail = f"Ollama: {res['error']}"
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
524
            except Exception:
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
                error_detail = f"Ollama: {e}"

        raise HTTPException(
            status_code=r.status_code if r else 500,
            detail=error_detail,
        )


class GenerateEmbeddingsForm(BaseModel):
    model: str
    prompt: str
    options: Optional[dict] = None
    keep_alive: Optional[Union[int, str]] = None


@app.post("/api/embeddings")
@app.post("/api/embeddings/{url_idx}")
async def generate_embeddings(
    form_data: GenerateEmbeddingsForm,
    url_idx: Optional[int] = None,
545
    user=Depends(get_verified_user),
546
):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
547
    if url_idx is None:
Timothy J. Baek's avatar
Timothy J. Baek committed
548
549
550
551
552
553
        model = form_data.model

        if ":" not in model:
            model = f"{model}:latest"

        if model in app.state.MODELS:
Timothy J. Baek's avatar
fix  
Timothy J. Baek committed
554
            url_idx = random.choice(app.state.MODELS[model]["urls"])
555
556
557
        else:
            raise HTTPException(
                status_code=400,
Timothy J. Baek's avatar
Timothy J. Baek committed
558
                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
559
560
            )

561
    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
562
    log.info(f"url: {url}")
563

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
564
565
566
567
568
    r = requests.request(
        method="POST",
        url=f"{url}/api/embeddings",
        data=form_data.model_dump_json(exclude_none=True).encode(),
    )
569
570
571
572
573
    try:
        r.raise_for_status()

        return r.json()
    except Exception as e:
574
        log.exception(e)
575
576
577
578
579
580
        error_detail = "Open WebUI: Server Connection Error"
        if r is not None:
            try:
                res = r.json()
                if "error" in res:
                    error_detail = f"Ollama: {res['error']}"
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
581
            except Exception:
582
583
584
585
586
587
588
589
                error_detail = f"Ollama: {e}"

        raise HTTPException(
            status_code=r.status_code if r else 500,
            detail=error_detail,
        )


590
591
592
593
def generate_ollama_embeddings(
    form_data: GenerateEmbeddingsForm,
    url_idx: Optional[int] = None,
):
594
    log.info(f"generate_ollama_embeddings {form_data}")
Timothy J. Baek's avatar
Timothy J. Baek committed
595

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
596
    if url_idx is None:
597
598
599
600
601
602
603
604
605
606
607
608
609
        model = form_data.model

        if ":" not in model:
            model = f"{model}:latest"

        if model in app.state.MODELS:
            url_idx = random.choice(app.state.MODELS[model]["urls"])
        else:
            raise HTTPException(
                status_code=400,
                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
            )

610
    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
611
612
    log.info(f"url: {url}")

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
613
614
615
616
617
    r = requests.request(
        method="POST",
        url=f"{url}/api/embeddings",
        data=form_data.model_dump_json(exclude_none=True).encode(),
    )
618
619
620
621
622
    try:
        r.raise_for_status()

        data = r.json()

623
        log.info(f"generate_ollama_embeddings {data}")
Timothy J. Baek's avatar
Timothy J. Baek committed
624

625
626
627
        if "embedding" in data:
            return data["embedding"]
        else:
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
628
            raise Exception("Something went wrong :/")
629
630
631
632
633
634
635
636
    except Exception as e:
        log.exception(e)
        error_detail = "Open WebUI: Server Connection Error"
        if r is not None:
            try:
                res = r.json()
                if "error" in res:
                    error_detail = f"Ollama: {res['error']}"
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
637
            except Exception:
638
639
                error_detail = f"Ollama: {e}"

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
640
        raise Exception(error_detail)
641
642


643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
class GenerateCompletionForm(BaseModel):
    model: str
    prompt: str
    images: Optional[List[str]] = None
    format: Optional[str] = None
    options: Optional[dict] = None
    system: Optional[str] = None
    template: Optional[str] = None
    context: Optional[str] = None
    stream: Optional[bool] = True
    raw: Optional[bool] = None
    keep_alive: Optional[Union[int, str]] = None


@app.post("/api/generate")
@app.post("/api/generate/{url_idx}")
async def generate_completion(
    form_data: GenerateCompletionForm,
    url_idx: Optional[int] = None,
662
    user=Depends(get_verified_user),
663
):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
664
    if url_idx is None:
Timothy J. Baek's avatar
Timothy J. Baek committed
665
666
667
668
669
670
        model = form_data.model

        if ":" not in model:
            model = f"{model}:latest"

        if model in app.state.MODELS:
Timothy J. Baek's avatar
fix  
Timothy J. Baek committed
671
            url_idx = random.choice(app.state.MODELS[model]["urls"])
672
673
674
        else:
            raise HTTPException(
                status_code=400,
Timothy J. Baek's avatar
Timothy J. Baek committed
675
                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
676
677
            )

678
    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
679
    log.info(f"url: {url}")
680

681
682
683
    return await post_streaming_url(
        f"{url}/api/generate", form_data.model_dump_json(exclude_none=True).encode()
    )
684
685
686
687
688
689
690
691
692
693
694
695
696
697


class ChatMessage(BaseModel):
    role: str
    content: str
    images: Optional[List[str]] = None


class GenerateChatCompletionForm(BaseModel):
    model: str
    messages: List[ChatMessage]
    format: Optional[str] = None
    options: Optional[dict] = None
    template: Optional[str] = None
Timothy J. Baek's avatar
Timothy J. Baek committed
698
    stream: Optional[bool] = None
699
700
701
702
703
    keep_alive: Optional[Union[int, str]] = None


@app.post("/api/chat")
@app.post("/api/chat/{url_idx}")
Timothy J. Baek's avatar
Timothy J. Baek committed
704
async def generate_chat_completion(
705
706
    form_data: GenerateChatCompletionForm,
    url_idx: Optional[int] = None,
707
    user=Depends(get_verified_user),
708
):
Timothy J. Baek's avatar
Timothy J. Baek committed
709
710
711
712
713
714
715
    log.debug(
        "form_data.model_dump_json(exclude_none=True).encode(): {0} ".format(
            form_data.model_dump_json(exclude_none=True).encode()
        )
    )

    payload = {
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
716
        **form_data.model_dump(exclude_none=True, exclude=["metadata"]),
Timothy J. Baek's avatar
Timothy J. Baek committed
717
    }
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
718
719
    if "metadata" in payload:
        del payload["metadata"]
Timothy J. Baek's avatar
Timothy J. Baek committed
720
721
722
723
724
725
726
727

    model_id = form_data.model
    model_info = Models.get_model_by_id(model_id)

    if model_info:
        if model_info.base_model_id:
            payload["model"] = model_info.base_model_id

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
728
        params = model_info.params.model_dump()
Timothy J. Baek's avatar
Timothy J. Baek committed
729

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
730
        if params:
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
731
732
            if payload.get("options") is None:
                payload["options"] = {}
Timothy J. Baek's avatar
Timothy J. Baek committed
733

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
734
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
735
                params.get("mirostat", None)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
736
737
                and payload["options"].get("mirostat") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
738
                payload["options"]["mirostat"] = params.get("mirostat", None)
Timothy J. Baek's avatar
Timothy J. Baek committed
739

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
740
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
741
                params.get("mirostat_eta", None)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
742
743
                and payload["options"].get("mirostat_eta") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
744
                payload["options"]["mirostat_eta"] = params.get("mirostat_eta", None)
Timothy J. Baek's avatar
Timothy J. Baek committed
745

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
746
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
747
                params.get("mirostat_tau", None)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
748
749
                and payload["options"].get("mirostat_tau") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
750
                payload["options"]["mirostat_tau"] = params.get("mirostat_tau", None)
Timothy J. Baek's avatar
Timothy J. Baek committed
751

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
752
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
753
                params.get("num_ctx", None)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
754
755
                and payload["options"].get("num_ctx") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
756
                payload["options"]["num_ctx"] = params.get("num_ctx", None)
757

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
758
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
759
                params.get("num_batch", None)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
760
761
                and payload["options"].get("num_batch") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
762
                payload["options"]["num_batch"] = params.get("num_batch", None)
Sam McLeod's avatar
Sam McLeod committed
763

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
764
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
765
                params.get("num_keep", None)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
766
767
                and payload["options"].get("num_keep") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
768
                payload["options"]["num_keep"] = params.get("num_keep", None)
Sam McLeod's avatar
Sam McLeod committed
769

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
770
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
771
                params.get("repeat_last_n", None)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
772
773
                and payload["options"].get("repeat_last_n") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
774
                payload["options"]["repeat_last_n"] = params.get("repeat_last_n", None)
775

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
776
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
777
                params.get("frequency_penalty", None)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
778
779
                and payload["options"].get("frequency_penalty") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
780
                payload["options"]["repeat_penalty"] = params.get(
781
782
                    "frequency_penalty", None
                )
Timothy J. Baek's avatar
Timothy J. Baek committed
783

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
784
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
785
                params.get("temperature", None) is not None
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
786
787
                and payload["options"].get("temperature") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
788
                payload["options"]["temperature"] = params.get("temperature", None)
789

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
790
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
791
                params.get("seed", None) is not None
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
792
793
                and payload["options"].get("seed") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
794
                payload["options"]["seed"] = params.get("seed", None)
795

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
796
            if params.get("stop", None) and payload["options"].get("stop") is None:
797
798
799
                payload["options"]["stop"] = (
                    [
                        bytes(stop, "utf-8").decode("unicode_escape")
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
800
                        for stop in params["stop"]
801
                    ]
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
802
                    if params.get("stop", None)
803
804
805
                    else None
                )

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
806
807
            if params.get("tfs_z", None) and payload["options"].get("tfs_z") is None:
                payload["options"]["tfs_z"] = params.get("tfs_z", None)
808

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
809
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
810
                params.get("max_tokens", None)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
811
812
                and payload["options"].get("max_tokens") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
813
                payload["options"]["num_predict"] = params.get("max_tokens", None)
814

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
815
816
            if params.get("top_k", None) and payload["options"].get("top_k") is None:
                payload["options"]["top_k"] = params.get("top_k", None)
817

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
818
819
            if params.get("top_p", None) and payload["options"].get("top_p") is None:
                payload["options"]["top_p"] = params.get("top_p", None)
820

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
821
822
            if params.get("min_p", None) and payload["options"].get("min_p") is None:
                payload["options"]["min_p"] = params.get("min_p", None)
Timothy J. Baek's avatar
Timothy J. Baek committed
823

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
824
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
825
                params.get("use_mmap", None)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
826
827
                and payload["options"].get("use_mmap") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
828
                payload["options"]["use_mmap"] = params.get("use_mmap", None)
829

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
830
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
831
                params.get("use_mlock", None)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
832
833
                and payload["options"].get("use_mlock") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
834
                payload["options"]["use_mlock"] = params.get("use_mlock", None)
835

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
836
            if (
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
837
                params.get("num_thread", None)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
838
839
                and payload["options"].get("num_thread") is None
            ):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
840
                payload["options"]["num_thread"] = params.get("num_thread", None)
Timothy J. Baek's avatar
Timothy J. Baek committed
841

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
842
        system = params.get("system", None)
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
        if system:
            system = prompt_template(
                system,
                **(
                    {
                        "user_name": user.name,
                        "user_location": (
                            user.info.get("location") if user.info else None
                        ),
                    }
                    if user
                    else {}
                ),
            )

Timothy J. Baek's avatar
Timothy J. Baek committed
858
            if payload.get("messages"):
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
859
860
861
                payload["messages"] = add_or_update_system_message(
                    system, payload["messages"]
                )
862

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
863
    if url_idx is None:
Timothy J. Baek's avatar
Timothy J. Baek committed
864
865
        if ":" not in payload["model"]:
            payload["model"] = f"{payload['model']}:latest"
Timothy J. Baek's avatar
Timothy J. Baek committed
866

Timothy J. Baek's avatar
Timothy J. Baek committed
867
868
        if payload["model"] in app.state.MODELS:
            url_idx = random.choice(app.state.MODELS[payload["model"]]["urls"])
869
870
871
        else:
            raise HTTPException(
                status_code=400,
Timothy J. Baek's avatar
Timothy J. Baek committed
872
                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
873
874
            )

875
    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
876
    log.info(f"url: {url}")
877
    log.debug(payload)
Timothy J. Baek's avatar
Timothy J. Baek committed
878

879
    return await post_streaming_url(f"{url}/api/chat", json.dumps(payload))
Timothy J. Baek's avatar
Timothy J. Baek committed
880
881
882


# TODO: we should update this part once Ollama supports other types
883
884
885
886
887
class OpenAIChatMessageContent(BaseModel):
    type: str
    model_config = ConfigDict(extra="allow")


Timothy J. Baek's avatar
Timothy J. Baek committed
888
889
class OpenAIChatMessage(BaseModel):
    role: str
890
    content: Union[str, OpenAIChatMessageContent]
Timothy J. Baek's avatar
Timothy J. Baek committed
891
892
893
894
895
896
897
898
899
900
901
902
903
904

    model_config = ConfigDict(extra="allow")


class OpenAIChatCompletionForm(BaseModel):
    model: str
    messages: List[OpenAIChatMessage]

    model_config = ConfigDict(extra="allow")


@app.post("/v1/chat/completions")
@app.post("/v1/chat/completions/{url_idx}")
async def generate_openai_chat_completion(
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
905
    form_data: dict,
Timothy J. Baek's avatar
Timothy J. Baek committed
906
    url_idx: Optional[int] = None,
907
    user=Depends(get_verified_user),
Timothy J. Baek's avatar
Timothy J. Baek committed
908
):
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
909
    form_data = OpenAIChatCompletionForm(**form_data)
Timothy J. Baek's avatar
fix  
Timothy J. Baek committed
910
    payload = {**form_data.model_dump(exclude_none=True, exclude=["metadata"])}
Timothy J. Baek's avatar
Timothy J. Baek committed
911

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
912
913
    if "metadata" in payload:
        del payload["metadata"]
Timothy J. Baek's avatar
Timothy J. Baek committed
914

Timothy J. Baek's avatar
Timothy J. Baek committed
915
916
    model_id = form_data.model
    model_info = Models.get_model_by_id(model_id)
Timothy J. Baek's avatar
Timothy J. Baek committed
917

Timothy J. Baek's avatar
Timothy J. Baek committed
918
919
920
921
922
923
924
925
926
927
928
929
930
931
    if model_info:
        if model_info.base_model_id:
            payload["model"] = model_info.base_model_id

        model_info.params = model_info.params.model_dump()

        if model_info.params:
            payload["temperature"] = model_info.params.get("temperature", None)
            payload["top_p"] = model_info.params.get("top_p", None)
            payload["max_tokens"] = model_info.params.get("max_tokens", None)
            payload["frequency_penalty"] = model_info.params.get(
                "frequency_penalty", None
            )
            payload["seed"] = model_info.params.get("seed", None)
Timothy J. Baek's avatar
Timothy J. Baek committed
932
933
934
935
936
937
938
939
            payload["stop"] = (
                [
                    bytes(stop, "utf-8").decode("unicode_escape")
                    for stop in model_info.params["stop"]
                ]
                if model_info.params.get("stop", None)
                else None
            )
Timothy J. Baek's avatar
Timothy J. Baek committed
940

941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
        system = model_info.params.get("system", None)

        if system:
            system = prompt_template(
                system,
                **(
                    {
                        "user_name": user.name,
                        "user_location": (
                            user.info.get("location") if user.info else None
                        ),
                    }
                    if user
                    else {}
                ),
            )
Timothy J. Baek's avatar
Timothy J. Baek committed
957
958
959
960
961
            # Check if the payload already has a system message
            # If not, add a system message to the payload
            if payload.get("messages"):
                for message in payload["messages"]:
                    if message.get("role") == "system":
962
                        message["content"] = system + message["content"]
Timothy J. Baek's avatar
Timothy J. Baek committed
963
964
965
966
967
968
                        break
                else:
                    payload["messages"].insert(
                        0,
                        {
                            "role": "system",
969
                            "content": system,
Timothy J. Baek's avatar
Timothy J. Baek committed
970
971
972
                        },
                    )

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
973
    if url_idx is None:
Timothy J. Baek's avatar
Timothy J. Baek committed
974
975
976
977
978
        if ":" not in payload["model"]:
            payload["model"] = f"{payload['model']}:latest"

        if payload["model"] in app.state.MODELS:
            url_idx = random.choice(app.state.MODELS[payload["model"]]["urls"])
Timothy J. Baek's avatar
Timothy J. Baek committed
979
980
981
982
983
984
        else:
            raise HTTPException(
                status_code=400,
                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
            )

985
    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
986
    log.info(f"url: {url}")
Timothy J. Baek's avatar
Timothy J. Baek committed
987

988
989
990
991
992
    return await post_streaming_url(
        f"{url}/v1/chat/completions",
        json.dumps(payload),
        stream=payload.get("stream", False),
    )
993
994


995
996
997
998
999
1000
@app.get("/v1/models")
@app.get("/v1/models/{url_idx}")
async def get_openai_models(
    url_idx: Optional[int] = None,
    user=Depends(get_verified_user),
):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
1001
    if url_idx is None:
1002
1003
        models = await get_all_models()

Timothy J. Baek's avatar
Timothy J. Baek committed
1004
        if app.state.config.ENABLE_MODEL_FILTER:
1005
1006
1007
            if user.role == "user":
                models["models"] = list(
                    filter(
Timothy J. Baek's avatar
Timothy J. Baek committed
1008
1009
                        lambda model: model["name"]
                        in app.state.config.MODEL_FILTER_LIST,
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
                        models["models"],
                    )
                )

        return {
            "data": [
                {
                    "id": model["model"],
                    "object": "model",
                    "created": int(time.time()),
                    "owned_by": "openai",
                }
                for model in models["models"]
            ],
            "object": "list",
        }

    else:
1028
        url = app.state.config.OLLAMA_BASE_URLS[url_idx]
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
        try:
            r = requests.request(method="GET", url=f"{url}/api/tags")
            r.raise_for_status()

            models = r.json()

            return {
                "data": [
                    {
                        "id": model["model"],
                        "object": "model",
                        "created": int(time.time()),
                        "owned_by": "openai",
                    }
                    for model in models["models"]
                ],
                "object": "list",
            }

        except Exception as e:
            log.exception(e)
            error_detail = "Open WebUI: Server Connection Error"
            if r is not None:
                try:
                    res = r.json()
                    if "error" in res:
                        error_detail = f"Ollama: {res['error']}"
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
1056
                except Exception:
1057
1058
1059
1060
1061
1062
1063
1064
                    error_detail = f"Ollama: {e}"

            raise HTTPException(
                status_code=r.status_code if r else 500,
                detail=error_detail,
            )


Timothy J. Baek's avatar
Timothy J. Baek committed
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
class UrlForm(BaseModel):
    url: str


class UploadBlobForm(BaseModel):
    filename: str


def parse_huggingface_url(hf_url):
    try:
        # Parse the URL
        parsed_url = urlparse(hf_url)

        # Get the path and split it into components
        path_components = parsed_url.path.split("/")

        # Extract the desired output
        model_file = path_components[-1]

        return model_file
    except ValueError:
        return None


async def download_file_stream(
    ollama_url, file_url, file_path, file_name, chunk_size=1024 * 1024
):
    done = False

    if os.path.exists(file_path):
        current_size = os.path.getsize(file_path)
    else:
        current_size = 0

    headers = {"Range": f"bytes={current_size}-"} if current_size > 0 else {}

    timeout = aiohttp.ClientTimeout(total=600)  # Set the timeout

1103
    async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
Timothy J. Baek's avatar
Timothy J. Baek committed
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
        async with session.get(file_url, headers=headers) as response:
            total_size = int(response.headers.get("content-length", 0)) + current_size

            with open(file_path, "ab+") as file:
                async for data in response.content.iter_chunked(chunk_size):
                    current_size += len(data)
                    file.write(data)

                    done = current_size == total_size
                    progress = round((current_size / total_size) * 100, 2)
Timothy J. Baek's avatar
Timothy J. Baek committed
1114

Timothy J. Baek's avatar
Timothy J. Baek committed
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
                    yield f'data: {{"progress": {progress}, "completed": {current_size}, "total": {total_size}}}\n\n'

                if done:
                    file.seek(0)
                    hashed = calculate_sha256(file)
                    file.seek(0)

                    url = f"{ollama_url}/api/blobs/sha256:{hashed}"
                    response = requests.post(url, data=file)

                    if response.ok:
                        res = {
                            "done": done,
                            "blob": f"sha256:{hashed}",
                            "name": file_name,
                        }
                        os.remove(file_path)

                        yield f"data: {json.dumps(res)}\n\n"
                    else:
                        raise "Ollama: Could not create blob, Please try again."


# url = "https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/stablelm-zephyr-3b.Q2_K.gguf"
@app.post("/models/download")
@app.post("/models/download/{url_idx}")
async def download_model(
    form_data: UrlForm,
    url_idx: Optional[int] = None,
Timothy J. Baek's avatar
fix  
Timothy J. Baek committed
1144
    user=Depends(get_admin_user),
Timothy J. Baek's avatar
Timothy J. Baek committed
1145
):
Timothy J. Baek's avatar
Timothy J. Baek committed
1146
1147
1148
1149
1150
1151
1152
1153
    allowed_hosts = ["https://huggingface.co/", "https://github.com/"]

    if not any(form_data.url.startswith(host) for host in allowed_hosts):
        raise HTTPException(
            status_code=400,
            detail="Invalid file_url. Only URLs from allowed hosts are permitted.",
        )

Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
1154
    if url_idx is None:
Timothy J. Baek's avatar
Timothy J. Baek committed
1155
        url_idx = 0
1156
    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
Timothy J. Baek's avatar
Timothy J. Baek committed
1157
1158
1159
1160
1161

    file_name = parse_huggingface_url(form_data.url)

    if file_name:
        file_path = f"{UPLOAD_DIR}/{file_name}"
Timothy J. Baek's avatar
Timothy J. Baek committed
1162

Timothy J. Baek's avatar
Timothy J. Baek committed
1163
        return StreamingResponse(
Timothy J. Baek's avatar
Timothy J. Baek committed
1164
            download_file_stream(url, form_data.url, file_path, file_name),
Timothy J. Baek's avatar
Timothy J. Baek committed
1165
1166
1167
1168
1169
1170
1171
        )
    else:
        return None


@app.post("/models/upload")
@app.post("/models/upload/{url_idx}")
Timothy J. Baek's avatar
fix  
Timothy J. Baek committed
1172
1173
1174
1175
1176
def upload_model(
    file: UploadFile = File(...),
    url_idx: Optional[int] = None,
    user=Depends(get_admin_user),
):
Michael Poluektov's avatar
cleanup  
Michael Poluektov committed
1177
    if url_idx is None:
Timothy J. Baek's avatar
Timothy J. Baek committed
1178
        url_idx = 0
1179
    ollama_url = app.state.config.OLLAMA_BASE_URLS[url_idx]
Timothy J. Baek's avatar
Timothy J. Baek committed
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217

    file_path = f"{UPLOAD_DIR}/{file.filename}"

    # Save file in chunks
    with open(file_path, "wb+") as f:
        for chunk in file.file:
            f.write(chunk)

    def file_process_stream():
        nonlocal ollama_url
        total_size = os.path.getsize(file_path)
        chunk_size = 1024 * 1024
        try:
            with open(file_path, "rb") as f:
                total = 0
                done = False

                while not done:
                    chunk = f.read(chunk_size)
                    if not chunk:
                        done = True
                        continue

                    total += len(chunk)
                    progress = round((total / total_size) * 100, 2)

                    res = {
                        "progress": progress,
                        "total": total_size,
                        "completed": total,
                    }
                    yield f"data: {json.dumps(res)}\n\n"

                if done:
                    f.seek(0)
                    hashed = calculate_sha256(f)
                    f.seek(0)

Timothy J. Baek's avatar
Timothy J. Baek committed
1218
                    url = f"{ollama_url}/api/blobs/sha256:{hashed}"
Timothy J. Baek's avatar
Timothy J. Baek committed
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
                    response = requests.post(url, data=f)

                    if response.ok:
                        res = {
                            "done": done,
                            "blob": f"sha256:{hashed}",
                            "name": file.filename,
                        }
                        os.remove(file_path)
                        yield f"data: {json.dumps(res)}\n\n"
                    else:
                        raise Exception(
                            "Ollama: Could not create blob, Please try again."
                        )

        except Exception as e:
            res = {"error": str(e)}
            yield f"data: {json.dumps(res)}\n\n"

    return StreamingResponse(file_process_stream(), media_type="text/event-stream")