google_pse.py 1.33 KB
Newer Older
1
2
import json
import logging
3
from typing import List
4
5
import requests

6
from apps.rag.search.main import SearchResult, filter_by_whitelist
Timothy J. Baek's avatar
Timothy J. Baek committed
7
from config import SRC_LOG_LEVELS
8
9
10
11
12
13

log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])


def search_google_pse(
14
    api_key: str, search_engine_id: str, query: str, count: int, whitelist:List[str]
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
) -> list[SearchResult]:
    """Search using Google's Programmable Search Engine API and return the results as a list of SearchResult objects.

    Args:
        api_key (str): A Programmable Search Engine API key
        search_engine_id (str): A Programmable Search Engine ID
        query (str): The query to search for
    """
    url = "https://www.googleapis.com/customsearch/v1"

    headers = {"Content-Type": "application/json"}
    params = {
        "cx": search_engine_id,
        "q": query,
        "key": api_key,
Timothy J. Baek's avatar
Timothy J. Baek committed
30
        "num": count,
31
32
    }

33
    response = requests.request("GET", url, headers=headers, params=params)
34
35
36
37
    response.raise_for_status()

    json_response = response.json()
    results = json_response.get("items", [])
38
    filtered_results = filter_by_whitelist(results, whitelist)
39
40
    return [
        SearchResult(
41
42
43
            link=result["link"],
            title=result.get("title"),
            snippet=result.get("snippet"),
44
        )
45
        for result in filtered_results
46
    ]