model.py 3.91 KB
Newer Older
Bruce MacDonald's avatar
Bruce MacDonald committed
1
import requests
Bruce MacDonald's avatar
Bruce MacDonald committed
2
import validators
Bruce MacDonald's avatar
Bruce MacDonald committed
3
from pathlib import Path
4
from os import path, walk
Bruce MacDonald's avatar
Bruce MacDonald committed
5
6
from urllib.parse import urlsplit, urlunsplit
from tqdm import tqdm
Michael Yang's avatar
Michael Yang committed
7
8


Michael Yang's avatar
Michael Yang committed
9
10
MODELS_MANIFEST = 'https://ollama.ai/api/models'
MODELS_CACHE_PATH = path.join(Path.home(), '.ollama', 'models')
11
12


Bruce MacDonald's avatar
Bruce MacDonald committed
13
def models(*args, **kwargs):
Michael Yang's avatar
Michael Yang committed
14
    for _, _, files in walk(MODELS_CACHE_PATH):
Michael Yang's avatar
Michael Yang committed
15
        for file in files:
16
            base, ext = path.splitext(file)
17
18
            if ext == '.bin':
                yield base
Bruce MacDonald's avatar
Bruce MacDonald committed
19
20


Bruce MacDonald's avatar
Bruce MacDonald committed
21
22
# get the url of the model from our curated directory
def get_url_from_directory(model):
Michael Yang's avatar
Michael Yang committed
23
    response = requests.get(MODELS_MANIFEST)
Bruce MacDonald's avatar
Bruce MacDonald committed
24
25
26
27
28
29
30
31
    response.raise_for_status()
    directory = response.json()
    for model_info in directory:
        if model_info.get('name') == model:
            return model_info.get('url')
    return model


Bruce MacDonald's avatar
Bruce MacDonald committed
32
def download_from_repo(url, file_name):
Bruce MacDonald's avatar
Bruce MacDonald committed
33
    parts = urlsplit(url)
34
    path_parts = parts.path.split('/tree/')
Bruce MacDonald's avatar
Bruce MacDonald committed
35
36

    if len(path_parts) == 1:
37
38
        location = path_parts[0]
        branch = 'main'
Bruce MacDonald's avatar
Bruce MacDonald committed
39
    else:
40
        location, branch = path_parts
Bruce MacDonald's avatar
Bruce MacDonald committed
41

42
    location = location.strip('/')
43
    if file_name == '':
44
        file_name = path.basename(location).lower()
Bruce MacDonald's avatar
Bruce MacDonald committed
45

46
    download_url = urlunsplit(
Bruce MacDonald's avatar
Bruce MacDonald committed
47
        (
48
            'https',
Bruce MacDonald's avatar
Bruce MacDonald committed
49
            parts.netloc,
50
            f'/api/models/{location}/tree/{branch}',
Bruce MacDonald's avatar
Bruce MacDonald committed
51
52
53
54
            parts.query,
            parts.fragment,
        )
    )
55
    response = requests.get(download_url)
Bruce MacDonald's avatar
Bruce MacDonald committed
56
    response.raise_for_status()
Bruce MacDonald's avatar
Bruce MacDonald committed
57
58
    json_response = response.json()

Bruce MacDonald's avatar
Bruce MacDonald committed
59
    download_url, file_size = find_bin_file(json_response, location, branch)
Bruce MacDonald's avatar
Bruce MacDonald committed
60
    return download_file(download_url, file_name, file_size)
Bruce MacDonald's avatar
Bruce MacDonald committed
61
62
63


def find_bin_file(json_response, location, branch):
Bruce MacDonald's avatar
Bruce MacDonald committed
64
    download_url = None
Bruce MacDonald's avatar
Bruce MacDonald committed
65
    file_size = 0
Bruce MacDonald's avatar
Bruce MacDonald committed
66
    for file_info in json_response:
67
68
69
70
71
72
        if file_info.get('type') == 'file' and file_info.get('path').endswith('.bin'):
            f_path = file_info.get('path')
            download_url = (
                f'https://huggingface.co/{location}/resolve/{branch}/{f_path}'
            )
            file_size = file_info.get('size')
Bruce MacDonald's avatar
Bruce MacDonald committed
73

Bruce MacDonald's avatar
Bruce MacDonald committed
74
    if download_url is None:
75
        raise Exception('No model found')
Bruce MacDonald's avatar
Bruce MacDonald committed
76

Bruce MacDonald's avatar
Bruce MacDonald committed
77
78
    return download_url, file_size

Bruce MacDonald's avatar
Bruce MacDonald committed
79

Bruce MacDonald's avatar
Bruce MacDonald committed
80
def download_file(download_url, file_name, file_size):
Michael Yang's avatar
Michael Yang committed
81
    local_filename = path.join(MODELS_CACHE_PATH, file_name) + '.bin'
Bruce MacDonald's avatar
Bruce MacDonald committed
82

83
    first_byte = path.getsize(local_filename) if path.exists(local_filename) else 0
Bruce MacDonald's avatar
Bruce MacDonald committed
84
85
86
87

    if first_byte >= file_size:
        return local_filename

88
    print(f'Pulling {file_name}...')
Bruce MacDonald's avatar
Bruce MacDonald committed
89

Bruce MacDonald's avatar
Bruce MacDonald committed
90
    header = {'Range': f'bytes={first_byte}-'} if first_byte != 0 else {}
Bruce MacDonald's avatar
Bruce MacDonald committed
91

Bruce MacDonald's avatar
Bruce MacDonald committed
92
    response = requests.get(download_url, headers=header, stream=True)
Bruce MacDonald's avatar
Bruce MacDonald committed
93
    response.raise_for_status()
Bruce MacDonald's avatar
Bruce MacDonald committed
94

95
    total_size = int(response.headers.get('content-length', 0)) + first_byte
Bruce MacDonald's avatar
Bruce MacDonald committed
96

97
    with open(local_filename, 'ab' if first_byte else 'wb') as file, tqdm(
Bruce MacDonald's avatar
Bruce MacDonald committed
98
        total=total_size,
99
        unit='iB',
Bruce MacDonald's avatar
Bruce MacDonald committed
100
101
102
        unit_scale=True,
        unit_divisor=1024,
        initial=first_byte,
103
104
        ascii=' ==',
        bar_format='Downloading [{bar}] {percentage:3.2f}% {rate_fmt}{postfix}',
Bruce MacDonald's avatar
Bruce MacDonald committed
105
106
107
108
    ) as bar:
        for data in response.iter_content(chunk_size=1024):
            size = file.write(data)
            bar.update(size)
Bruce MacDonald's avatar
Bruce MacDonald committed
109
110

    return local_filename
Bruce MacDonald's avatar
Bruce MacDonald committed
111
112


Bruce MacDonald's avatar
Bruce MacDonald committed
113
def pull(model, *args, **kwargs):
114
    if path.exists(model):
Bruce MacDonald's avatar
Bruce MacDonald committed
115
116
117
118
        # a file on the filesystem is being specified
        return model
    # check the remote model location and see if it needs to be downloaded
    url = model
119
    file_name = ""
Bruce MacDonald's avatar
Bruce MacDonald committed
120
121
    if not validators.url(url) and not url.startswith('huggingface.co'):
        url = get_url_from_directory(model)
122
        file_name = model
Bruce MacDonald's avatar
Bruce MacDonald committed
123
124
125
126
127

    if not (url.startswith('http://') or url.startswith('https://')):
        url = f'https://{url}'

    if not validators.url(url):
Michael Yang's avatar
Michael Yang committed
128
        if model in models(MODELS_CACHE_PATH):
Bruce MacDonald's avatar
Bruce MacDonald committed
129
130
131
132
            # the model is already downloaded, and specified by name
            return model
        raise Exception(f'Unknown model {model}')

Bruce MacDonald's avatar
Bruce MacDonald committed
133
    local_filename = download_from_repo(url, file_name)
Bruce MacDonald's avatar
Bruce MacDonald committed
134
135

    return local_filename