from dataflow.operators.core_text import PromptedGenerator from dataflow.serving import APILLMServing_request, LiteLLMServing from dataflow.utils.storage import FileStorage class GPT_generator(): def __init__(self): self.storage = FileStorage( first_entry_file_name= "../../dataflow/example/GeneralTextPipeline/translation.jsonl", cache_path="./cache", file_name_prefix="translation", cache_type="jsonl", ) self.model_cache_dir = './dataflow_cache' self.llm_serving = APILLMServing_request( api_url="https://api.openai.com/v1/chat/completions", model_name="gpt-5", max_workers=10, # custom_llm_provider="openai", # if your are using custom llm provider's api ) self.prompt_generator = PromptedGenerator( llm_serving = self.llm_serving, system_prompt = "Please translate to Chinese. Please answer in JSON format.", json_schema={ "type": "object", "properties": { "original": {"type": "string"}, "translation": {"type": "string"} }, "required": ["original", "translation"], "additionalProperties": False, }, ) def forward(self): # Initial filters self.prompt_generator.run( storage = self.storage.step(), input_key = "raw_content", ) if __name__ == "__main__": # This is the entry point for the pipeline model = GPT_generator() model.forward()