inference

02f63cdc · mashun1 · 02f63cdc · 02f63cdc · 02f63cdc · 02f63cdc
Commit 02f63cdc authored Jul 26, 2024 by mashun1
10 changed files
--- a/notebooks/Prompt_Engineering_for_ImageNet.ipynb
+++ b/notebooks/Prompt_Engineering_for_ImageNet.ipynb
--- a/readme_imgs/alg.png
+++ b/readme_imgs/alg.png
--- a/readme_imgs/model.png
+++ b/readme_imgs/model.png
--- a/readme_imgs/r.png
+++ b/readme_imgs/r.png
--- a/requirements.txt
+++ b/requirements.txt
+ftfy
+packaging
+regex
+tqdm
+scikit-learn
+# torch
+# torchvision
--- a/setup.py
+++ b/setup.py
+import os
+
+import pkg_resources
+from setuptools import setup, find_packages
+
+setup(
+    name="clip",
+    py_modules=["clip"],
+    version="1.0",
+    description="",
+    author="OpenAI",
+    packages=find_packages(exclude=["tests*"]),
+    install_requires=[
+        str(r)
+        for r in pkg_resources.parse_requirements(
+            open(os.path.join(os.path.dirname(__file__), "requirements.txt"))
+        )
+    ],
+    include_package_data=True,
+    extras_require={'dev': ['pytest']},
+)
--- a/tests/linear_probe.py
+++ b/tests/linear_probe.py
+import torch
+import clip
+import os
+
+from tqdm import tqdm
+from PIL import Image
+from torch.utils.data import DataLoader
+from torchvision.datasets import CIFAR100
+
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+
+
+
+def get_features(dataset):
+        all_features = []
+        all_labels = []
+        
+        with torch.no_grad():
+            for images, labels in tqdm(DataLoader(dataset, batch_size=100)):
+                features = model.encode_image(images.to(device))
+
+                all_features.append(features)
+                all_labels.append(labels)
+
+        return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy()
+
+
+if __name__ == "__main__":
+    import argparse
+    
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--pt", type=str, help="模型名称")
+    
+    args = parser.parse_args()
+    
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    
+    if ".pt" in args.pt:
+        model, preprocess = clip.load(f"pretrained_models/{args.pt}", device=device)
+    else:
+        model, preprocess = clip.load(f"{args.pt}", device=device)
+        
+    # Download the dataset
+    cifar100 = CIFAR100(root=os.path.expanduser("~/.cache"), download=True, train=False)
+    
+    # Load the dataset
+    root = os.path.expanduser("~/.cache")
+    train = CIFAR100(root, download=True, train=True, transform=preprocess)
+    test = CIFAR100(root, download=True, train=False, transform=preprocess)
+
+    # Calculate the image features
+    train_features, train_labels = get_features(train)
+    test_features, test_labels = get_features(test)
+
+    # Perform logistic regression
+    classifier = LogisticRegression(random_state=0, C=0.316, max_iter=1000, verbose=1)
+    classifier.fit(train_features, train_labels)
+
+    # Evaluate using the logistic regression classifier
+    predictions = classifier.predict(test_features)
+    accuracy = np.mean((test_labels == predictions).astype(float)) * 100.
+    print(f"Accuracy = {accuracy:.3f}")
--- a/tests/simple_test.py
+++ b/tests/simple_test.py
+import torch
+import clip
+import os
+
+from PIL import Image
+
+
+if __name__ == "__main__":
+    import argparse
+    
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--pt", type=str, help="模型名称")
+    
+    args = parser.parse_args()
+    
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    
+    if ".pt" in args.pt:
+        model, preprocess = clip.load(f"pretrained_models/{args.pt}", device=device)
+    else:
+        model, preprocess = clip.load(f"{args.pt}", device=device)
+    
+    image = preprocess(Image.open("CLIP.png")).unsqueeze(0).to(device)
+    text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device)
+
+    with torch.no_grad():
+        image_features = model.encode_image(image)
+        text_features = model.encode_text(text)
+        
+        logits_per_image, logits_per_text = model(image, text)
+        probs = logits_per_image.softmax(dim=-1).cpu().numpy()
+        
+        print(probs)
\ No newline at end of file
--- a/tests/test_consistency.py
+++ b/tests/test_consistency.py
+import numpy as np
+import pytest
+import torch
+from PIL import Image
+
+import clip
+
+
+@pytest.mark.parametrize('model_name', clip.available_models())
+def test_consistency(model_name):
+    device = "cpu"
+    jit_model, transform = clip.load(model_name, device=device, jit=True)
+    py_model, _ = clip.load(model_name, device=device, jit=False)
+
+    image = transform(Image.open("CLIP.png")).unsqueeze(0).to(device)
+    text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device)
+
+    with torch.no_grad():
+        logits_per_image, _ = jit_model(image, text)
+        jit_probs = logits_per_image.softmax(dim=-1).cpu().numpy()
+
+        logits_per_image, _ = py_model(image, text)
+        py_probs = logits_per_image.softmax(dim=-1).cpu().numpy()
+
+    assert np.allclose(jit_probs, py_probs, atol=0.01, rtol=0.1)
--- a/tests/zero_shot_prediction.py
+++ b/tests/zero_shot_prediction.py