generate_qa_json_schema.py

from dataflow.operators.core_text import PromptedGenerator
from dataflow.serving import APILLMServing_request
from dataflow.utils.storage import FileStorage

class GPT_generator():
    def __init__(self):
        self.storage = FileStorage(
            first_entry_file_name="../../dataflow/example/GeneralTextPipeline/math_100.jsonl",
            cache_path="./cache",
            file_name_prefix="math_QA",
            cache_type="jsonl",
        )
        self.model_cache_dir = './dataflow_cache'
        self.llm_serving = APILLMServing_request(
                api_url="http://123.129.219.111:3000/v1/chat/completions",
                model_name="gpt-4o",
                max_workers=50
        )
        self.prompt_generator = PromptedGenerator(
                llm_serving = self.llm_serving,
                system_prompt = "Please solve this math problem. Answer in JSON format.",
                json_schema={
                    "type": "object",
                    "properties": {
                        "problem": {
                            "type": "string",
                            "description": "The math problem that needs to be solved."
                        },
                        "solution": {
                            "type": "string",
                            "description": "Step-by-step reasoning and solution process."
                        },
                        "answer": {
                            "type": "string",
                            "description": "The final answer to the math problem."
                        }
                    },
                    "required": ["problem", "solution", "answer"],
                    "additionalProperties": False
                }
                )        

    def forward(self):
        # Initial filters
        self.prompt_generator.run(
            storage = self.storage.step(),
            input_key = "problem",
        )


if __name__ == "__main__":
    # This is the entry point for the pipeline

    model = GPT_generator()
    model.forward()