client_evaluation.py 2.74 KB
Newer Older
chenzk's avatar
v1.0  
chenzk committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""Client: run evaluation example.

This example demonstrates how to use the dbgpt client to evaluate with the rag recall
and app answer.

Example:
    .. code-block:: python

        DBGPT_API_KEY = "dbgpt"
        client = Client(api_key=DBGPT_API_KEY)

        # 1. evaluate with rag recall
        request = EvaluateServeRequest(
            # The scene type of the evaluation, e.g. support app, recall
            scene_key="recall",
            # e.g. app id(when scene_key is app), space id(when scene_key is recall)
            scene_value="147",
            context={"top_k": 5},
            evaluate_metrics=[
                "RetrieverHitRateMetric",
                "RetrieverMRRMetric",
                "RetrieverSimilarityMetric",
            ],
            datasets=[
                {
                    "query": "what awel talked about",
                    "doc_name": "awel.md",
                }
            ],
        )
        # 2. evaluate with app answer
        request = EvaluateServeRequest(
            # The scene type of the evaluation, e.g. support app, recall
            scene_key="app",
            # e.g. app id(when scene_key is app), space id(when scene_key is recall)
            scene_value="2c76eea2-83b6-11ef-b482-acde48001122",
            "context"={
                "top_k": 5,
                "prompt": "942acd7e33b54ce28565f89f9b278044",
                "model": "zhipu_proxyllm",
            },
            evaluate_metrics=[
                "AnswerRelevancyMetric",
            ],
            datasets=[
                {
                    "query": "what awel talked about",
                    "doc_name": "awel.md",
                }
            ],
        )
        data = await run_evaluation(client, request=request)
        print(data)
"""

import asyncio

from dbgpt_client import Client
from dbgpt_client.evaluation import run_evaluation
from dbgpt_serve.evaluate.api.schemas import EvaluateServeRequest


async def main():
    # initialize client
    DBGPT_API_KEY = "dbgpt"
    SPACE_ID = "147"
    client = Client(api_key=DBGPT_API_KEY)
    request = EvaluateServeRequest(
        # The scene type of the evaluation, e.g. support app, recall
        scene_key="recall",
        # e.g. app id(when scene_key is app), space id(when scene_key is recall)
        scene_value=SPACE_ID,
        context={"top_k": 5},
        evaluate_metrics=[
            "RetrieverHitRateMetric",
            "RetrieverMRRMetric",
            "RetrieverSimilarityMetric",
        ],
        datasets=[
            {
                "query": "what awel talked about",
                "doc_name": "awel.md",
            }
        ],
    )
    data = await run_evaluation(client, request=request)
    print(data)


if __name__ == "__main__":
    asyncio.run(main())