generate_qa_json_schema.py 2.02 KB
Newer Older
zzg_666's avatar
zzg_666 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from dataflow.operators.core_text import PromptedGenerator
from dataflow.serving import APILLMServing_request
from dataflow.utils.storage import FileStorage

class GPT_generator():
    def __init__(self):
        self.storage = FileStorage(
            first_entry_file_name="../../dataflow/example/GeneralTextPipeline/math_100.jsonl",
            cache_path="./cache",
            file_name_prefix="math_QA",
            cache_type="jsonl",
        )
        self.model_cache_dir = './dataflow_cache'
        self.llm_serving = APILLMServing_request(
                api_url="http://123.129.219.111:3000/v1/chat/completions",
                model_name="gpt-4o",
                max_workers=50
        )
        self.prompt_generator = PromptedGenerator(
                llm_serving = self.llm_serving,
                system_prompt = "Please solve this math problem. Answer in JSON format.",
                json_schema={
                    "type": "object",
                    "properties": {
                        "problem": {
                            "type": "string",
                            "description": "The math problem that needs to be solved."
                        },
                        "solution": {
                            "type": "string",
                            "description": "Step-by-step reasoning and solution process."
                        },
                        "answer": {
                            "type": "string",
                            "description": "The final answer to the math problem."
                        }
                    },
                    "required": ["problem", "solution", "answer"],
                    "additionalProperties": False
                }
                )        

    def forward(self):
        # Initial filters
        self.prompt_generator.run(
            storage = self.storage.step(),
            input_key = "problem",
        )


if __name__ == "__main__":
    # This is the entry point for the pipeline

    model = GPT_generator()
    model.forward()