openai_util.py

import requests
import base64
from io import BytesIO
from PIL import Image, ImageOps
from typing import Union, Optional, Tuple, List
import os

def encode_pil_image(pil_image):
    # Create an in-memory binary stream
    image_stream = BytesIO()
    
    # Save the PIL image to the binary stream in JPEG format (you can change the format if needed)
    pil_image.save(image_stream, format='JPEG')
    
    # Get the binary data from the stream and encode it as base64
    image_data = image_stream.getvalue()
    base64_image = base64.b64encode(image_data).decode('utf-8')
    
    return base64_image

def load_image(image: Union[str, Image.Image], format: str = "RGB", size: Optional[Tuple] = None) -> Image.Image:
    """
    Load an image from a given path or URL and convert it to a PIL Image.

    Args:
        image (Union[str, Image.Image]): The image path, URL, or a PIL Image object to be loaded.
        format (str, optional): Desired color format of the resulting image. Defaults to "RGB".
        size (Optional[Tuple], optional): Desired size for resizing the image. Defaults to None.

    Returns:
        Image.Image: A PIL Image in the specified format and size.

    Raises:
        ValueError: If the provided image format is not recognized.
    """
    if isinstance(image, str):
        if image.startswith("http://") or image.startswith("https://"):
            image = Image.open(requests.get(image, stream=True).raw)
        elif os.path.isfile(image):
            image = Image.open(image)
        else:
            raise ValueError(
                f"Incorrect path or url, URLs must start with `http://` or `https://`, and {image} is not a valid path"
            )
    elif isinstance(image, Image.Image):
        image = image
    else:
        raise ValueError(
            "Incorrect format used for image. Should be an url linking to an image, a local path, or a PIL image."
        )
    image = ImageOps.exif_transpose(image)
    image = image.convert(format)
    if (size != None):
        image = image.resize(size, Image.LANCZOS)
    return image

def prepare_prompt(image_links: List = [], text_prompt: str = ""):
    prompt_content = []
    text_dict = {"type": "text", "text": text_prompt}
    prompt_content.append(text_dict)

    if not isinstance(image_links, list):
        image_links = [image_links]
    for image_link in image_links:
        image = load_image(image_link)
        visual_dict = {
            "type": "image_url",
            "image_url": {"url": f"data:image/jpeg;base64,{encode_pil_image(image)}"},
        }
        prompt_content.append(visual_dict)
    return prompt_content

def ask_gpt4o(image_path, prompt, url, api_key):
    prompt = prepare_prompt(image_path, prompt)
    payload = {
        "model": "gpt-4.1",
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": 1400,
    }
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    try:
        response = requests.post(url, json=payload, headers=headers, timeout=180)  # Set timeout to 5 minutes (300 seconds)
    except Exception as e:
        print(f"Error: {e}")
        return ""
    return extract_response(response)

def extract_response(response):
    try:
        response = response.json()
        out = response["choices"][0]["message"]["content"]
        return out
    except:
        if response["error"]["code"] == "content_policy_violation":
            print("Code is content_policy_violation")
        elif response["error"]["code"] in [
            "rate_limit_exceeded",
            "insufficient_quota",
            "insufficient_user_quota",
        ]:
            print(f"Code is {response['error']['code']}", flush=True)
            print(response["error"]["message"], flush=True)
            return "rate_limit_exceeded"
        else:
            print("Code is different")
            print(response)
            print(f"{response['error']['code']=}")
    return ""