Commit 6ce9a58c authored by Michael Yang's avatar Michael Yang
Browse files

upgrade fuzzy search library

use process.extract to produce a list of fuzzy matches instead of
process.extractOne
parent 27ead750
import os
import sys
from os import path
from pathlib import Path
from contextlib import contextmanager
from fuzzywuzzy import process
from thefuzz import process
from llama_cpp import Llama
from ctransformers import AutoModelForCausalLM
......@@ -39,16 +38,14 @@ def load(model_name, models={}):
for model_type in cls.model_types()
}
while len(runners) > 0:
for match, _ in process.extract(model_path, runners.keys(), limit=len(runners)):
try:
best_match, _ = process.extractOne(model_path, runners.keys())
model = runners.get(best_match, LlamaCppRunner)
runner = model(model_path, best_match)
model = runners.get(match)
runner = model(model_path, match)
models.update({model_name: runner})
return models.get(model_name)
return runner
except Exception:
# try the next runner
runners.pop(best_match)
pass
raise Exception("failed to load model", model_path, model_name)
......
......@@ -443,23 +443,6 @@ smb = ["smbprotocol"]
ssh = ["paramiko"]
tqdm = ["tqdm"]
[[package]]
name = "fuzzywuzzy"
version = "0.18.0"
description = "Fuzzy string matching in python"
optional = false
python-versions = "*"
files = [
{file = "fuzzywuzzy-0.18.0-py2.py3-none-any.whl", hash = "sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993"},
{file = "fuzzywuzzy-0.18.0.tar.gz", hash = "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8"},
]
[package.dependencies]
python-levenshtein = {version = ">=0.12", optional = true, markers = "extra == \"speedup\""}
[package.extras]
speedup = ["python-levenshtein (>=0.12)"]
[[package]]
name = "huggingface-hub"
version = "0.15.1"
......@@ -1043,6 +1026,23 @@ files = [
[package.extras]
tests = ["pytest", "pytest-cov"]
[[package]]
name = "thefuzz"
version = "0.19.0"
description = "Fuzzy string matching in python"
optional = false
python-versions = "*"
files = [
{file = "thefuzz-0.19.0-py2.py3-none-any.whl", hash = "sha256:4fcdde8e40f5ca5e8106bc7665181f9598a9c8b18b0a4d38c41a095ba6788972"},
{file = "thefuzz-0.19.0.tar.gz", hash = "sha256:6f7126db2f2c8a54212b05e3a740e45f4291c497d75d20751728f635bb74aa3d"},
]
[package.dependencies]
python-levenshtein = {version = ">=0.12", optional = true, markers = "extra == \"speedup\""}
[package.extras]
speedup = ["python-levenshtein (>=0.12)"]
[[package]]
name = "tqdm"
version = "4.65.0"
......@@ -1211,4 +1211,4 @@ termcolor = ">=2.2,<3.0"
[metadata]
lock-version = "2.0"
python-versions = "^3.8"
content-hash = "bd4b373e3903bd26b983163f3cc527a6f768f4280201fcbca4d4dc05dea66912"
content-hash = "9e9c14aae817d7863b4facda840e246f98b2d6b2517085b22eb5c5d919ae7784"
......@@ -18,7 +18,7 @@ validators = "^0.20.0"
yaspin = "^2.3.0"
llama-cpp-python = "^0.1.67"
ctransformers = "^0.2.10"
fuzzywuzzy = {extras = ["speedup"], version = "^0.18.0"}
thefuzz = {version = "^0.19.0", extras = ["speedup"]}
[build-system]
requires = ["poetry-core"]
......
......@@ -270,9 +270,6 @@ frozenlist==1.3.3 ; python_version >= "3.8" and python_version < "4.0" \
fsspec==2023.6.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a \
--hash=sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af
fuzzywuzzy[speedup]==0.18.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8 \
--hash=sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993
huggingface-hub==0.15.1 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:05b0fb0abbf1f625dfee864648ac3049fe225ac4371c7bafaca0c2d3a2f83445 \
--hash=sha256:a61b7d1a7769fe10119e730277c72ab99d95c48d86a3d6da3e9f3d0f632a4081
......@@ -688,6 +685,9 @@ requests==2.31.0 ; python_version >= "3.8" and python_version < "4.0" \
termcolor==2.3.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:3afb05607b89aed0ffe25202399ee0867ad4d3cb4180d98aaf8eefa6a5f7d475 \
--hash=sha256:b5b08f68937f138fe92f6c089b99f1e2da0ae56c52b78bf7075fd95420fd9a5a
thefuzz[speedup]==0.19.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:4fcdde8e40f5ca5e8106bc7665181f9598a9c8b18b0a4d38c41a095ba6788972 \
--hash=sha256:6f7126db2f2c8a54212b05e3a740e45f4291c497d75d20751728f635bb74aa3d
tqdm==4.65.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5 \
--hash=sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment