from dataflow.operators.core_text import PromptedGenerator
from dataflow.serving import APILLMServing_request, LiteLLMServing
from dataflow.utils.storage import FileStorage

class GPT_generator():
    def __init__(self):
        self.storage = FileStorage(
            first_entry_file_name= "../../dataflow/example/GeneralTextPipeline/translation.jsonl",
            cache_path="./cache",
            file_name_prefix="translation",
            cache_type="jsonl",
        )
        self.model_cache_dir = './dataflow_cache'
        self.llm_serving = APILLMServing_request(
                api_url="https://api.openai.com/v1/chat/completions",
                model_name="gpt-5",
                max_workers=10,
                # custom_llm_provider="openai", # if your are using custom llm provider's api
        )

        self.prompt_generator = PromptedGenerator(
            llm_serving = self.llm_serving,
            system_prompt = "Please translate to Chinese. Please answer in JSON format.",
            json_schema={
                "type": "object",
                "properties": {
                    "original": {"type": "string"},
                    "translation": {"type": "string"}
                },
                "required": ["original", "translation"],
                "additionalProperties": False,
            },
        )        

    def forward(self):
        # Initial filters
        self.prompt_generator.run(
            storage = self.storage.step(),
            input_key = "raw_content",
        )


if __name__ == "__main__":
    # This is the entry point for the pipeline

    model = GPT_generator()
    model.forward()