适配后端vllm

97e8278b · zzg_666 · 97e8278b · 97e8278b · 97e8278b · 97e8278b
Commit 97e8278b authored Dec 03, 2025 by zzg_666
5 changed files
--- a/dataflow/statics/playground/playground/reasoning_general_pipeline.py
+++ b/dataflow/statics/playground/playground/reasoning_general_pipeline.py
+from dataflow.operators.reasoning import (
+    ReasoningQuestionGenerator,
+    ReasoningAnswerGenerator,
+)
+from dataflow.operators.reasoning import ReasoningQuestionFilter, ReasoningAnswerNgramFilter, ReasoningAnswerModelJudgeFilter
+from dataflow.utils.storage import FileStorage
+from dataflow.serving import APILLMServing_request
+from dataflow.core import LLMServingABC
+from dataflow.prompts.reasoning.general import (
+    GeneralQuestionFilterPrompt,
+    GeneralAnswerGeneratorPrompt,
+    GeneralQuestionSynthesisPrompt,
+)
+from dataflow.prompts.model_evaluation.general import AnswerJudgePrompt
+
+class GeneralReasoning_APIPipeline():
+    def __init__(self, llm_serving: LLMServingABC = None):
+        
+        self.storage = FileStorage(
+            first_entry_file_name="../example_data/ReasoningPipeline/pipeline_general.json",
+            cache_path="./cache_local",
+            file_name_prefix="dataflow_cache_step",
+            cache_type="jsonl",
+        )
+
+        # use API server as LLM serving
+        self.llm_serving = APILLMServing_request(
+                    api_url="http://api.openai.com/v1/chat/completions",
+                    model_name="gpt-4o",
+                    max_workers=30
+        )
+
+        self.question_filter_step1 = ReasoningQuestionFilter(
+            system_prompt="You are an expert in evaluating mathematical problems. Follow the user's instructions strictly and output your final judgment in the required JSON format.",
+            llm_serving=self.llm_serving,
+            prompt_template=GeneralQuestionFilterPrompt()
+        )
+        
+        self.question_gen_step2 = ReasoningQuestionGenerator(
+            num_prompts=1,
+            llm_serving=self.llm_serving,
+            prompt_template=GeneralQuestionSynthesisPrompt()
+        )
+        
+        self.answer_generator_step3 = ReasoningAnswerGenerator(
+            llm_serving=self.llm_serving,
+            prompt_template=GeneralAnswerGeneratorPrompt()
+        )
+        self.answer_model_judge_step4 = ReasoningAnswerModelJudgeFilter(
+            llm_serving=self.llm_serving,
+            prompt_template=AnswerJudgePrompt(),
+            keep_all_samples=True
+        )
+        self.answer_ngram_filter_step5 = ReasoningAnswerNgramFilter(
+            min_score = 0.1,
+            max_score = 1.0,
+            ngrams = 5
+        )
+        
+    def forward(self):
+        self.question_filter_step1.run(
+            storage = self.storage.step(),
+            input_key = "instruction",
+        )
+
+        self.question_gen_step2.run(
+            storage = self.storage.step(),
+            input_key = "instruction",
+        )
+        self.answer_generator_step3.run(
+            storage = self.storage.step(),
+            input_key = "instruction", 
+            output_key = "generated_cot"
+        ),
+        self.answer_model_judge_step4.run(
+            storage = self.storage.step(),
+            input_question_key = "instruction",
+            input_answer_key = "generated_cot",
+            input_reference_key = "golden_answer"
+        ),
+        self.answer_ngram_filter_step5.run(
+            storage = self.storage.step(),
+            input_question_key = "instruction",
+            input_answer_key = "generated_cot"
+        )
+
+if __name__ == "__main__":
+    pl = GeneralReasoning_APIPipeline()
+    pl.forward()
--- a/dataflow/statics/playground/playground/text_conversation_synthesis_pipeline.py
+++ b/dataflow/statics/playground/playground/text_conversation_synthesis_pipeline.py
+from dataflow.operators.conversations import ConsistentChatGenerator
+from dataflow.utils.storage import FileStorage
+from dataflow.serving import APILLMServing_request 
+
+class TextPipeline():
+    def __init__(self):
+        self.storage = FileStorage(
+            first_entry_file_name="",
+            cache_path="./cache",
+            file_name_prefix="dataflow_cache_step",
+            cache_type="jsonl",
+        )
+        serving = APILLMServing_request(
+            api_url="http://123.129.219.111:3000/v1/chat/completions",
+            model_name="gpt-4o",
+            max_workers=100
+        )
+        self.model_cache_dir = './dataflow_cache'
+        self.processor = ConsistentChatGenerator(llm_serving=serving, num_dialogs_per_intent=5)
+
+    def forward(self):
+        self.processor.run(
+            storage=self.storage.step()
+        )
+
+if __name__ == "__main__":
+    # This is a test entry point for the TextPipeline
+    # It will run the forward method of the TextPipeline class
+    # to process the data and generate the output.
+    print("Running TextPipeline...")
+    model = TextPipeline()
+    model.forward()
\ No newline at end of file
--- a/dataflow/statics/playground/playground/text_sft_synthesis_from_scratch.py
+++ b/dataflow/statics/playground/playground/text_sft_synthesis_from_scratch.py
+ 
+from dataflow.operators.text_sft import AlpagasusFilter
+from dataflow.operators.text_sft import CondorGenerator
+from dataflow.operators.text_sft import CondorRefiner
+from dataflow.utils.storage import FileStorage
+from dataflow.serving import APILLMServing_request
+
+class TextPipeline():
+    def __init__(self):
+        self.storage = FileStorage(
+            first_entry_file_name="",
+            cache_path="./cache",
+            file_name_prefix="dataflow_cache_step",
+            cache_type="jsonl",
+        )
+        self.model_cache_dir = './dataflow_cache'
+        self.num_generated_samples = 3
+        llm_serving = APILLMServing_request(
+                api_url="https://api.openai.com/v1/chat/completions",
+                model_name="gpt-4o",
+                max_workers=100
+        )
+        self.generator = CondorGenerator(llm_serving=llm_serving, num_samples=self.num_generated_samples)
+        self.refiner = CondorRefiner(llm_serving=llm_serving)
+        self.alpagasus_filter = AlpagasusFilter(min_score=3,max_score=5,llm_serving=llm_serving)
+
+    def forward(self):
+        self.generator.run(
+            storage=self.storage.step()
+        )
+        self.refiner.run(
+            storage=self.storage.step(),
+            input_instruction_key='instruction',
+            input_output_key='output'
+        )
+        self.alpagasus_filter.run(
+            storage=self.storage.step(),
+            input_instruction_key='instruction',
+            input_input_key="input",
+            input_output_key='output'
+        )
+
+model = TextPipeline()
+model.forward()
--- a/dataflow/statics/playground/playground/text_sft_synthesis_from_seed.py
+++ b/dataflow/statics/playground/playground/text_sft_synthesis_from_seed.py
+from dataflow.operators.text_sft import SFTGeneratorSeed
+from dataflow.utils.storage import FileStorage
+from dataflow.serving import APILLMServing_request
+
+class TextPipeline():
+    def __init__(self):
+        self.storage = FileStorage(
+            first_entry_file_name="../example_data/GeneralTextPipeline/pt_input.jsonl",
+            cache_path="./cache",
+            file_name_prefix="dataflow_cache_step",
+            cache_type="jsonl",
+        )
+        self.model_cache_dir = './dataflow_cache'
+        self.num_generated_samples = 3
+        llm_serving = APILLMServing_request(
+                api_url="https://api.openai.com/v1/chat/completions",
+                model_name="gpt-4o",
+                max_workers=100
+        )
+        self.generator = SFTGeneratorSeed(llm_serving=llm_serving, custom_prompt="Try to make the question suitable for middle school students.")
+
+    def forward(self):
+        self.generator.run(
+            storage=self.storage.step()
+        )
+
+model = TextPipeline()
+model.forward()
--- a/dataflow/statics/playground/playground/vqa.py
+++ b/dataflow/statics/playground/playground/vqa.py
+from dataflow.operators.core_vision import PromptedVQAGenerator
+from dataflow.serving import APIVLMServing_openai
+from dataflow.utils.storage import FileStorage
+
+class VQA_generator():
+    def __init__(self):
+        self.prompt = "Describe the image in detail."
+        self.storage = FileStorage(
+            first_entry_file_name="../example_data/VQA/pic_path.json",
+            cache_path="./cache",
+            file_name_prefix="vqa",
+            cache_type="json",
+        )
+        self.llm_serving = APIVLMServing_openai(
+            model_name="o4-mini",
+            api_url="https://api.openai.com/v1", # openai api url
+            key_name_of_api_key="DF_API_KEY",
+        )
+        self.vqa_generate = PromptedVQAGenerator(
+            self.llm_serving,
+            self.prompt
+            )
+
+    def forward(self):
+        self.vqa_generate.run(
+            storage = self.storage.step(),
+            input_key = "raw_content",
+        )
+
+if __name__ == "__main__":
+    VQA_generator = VQA_generator()
+    VQA_generator.forward()
\ No newline at end of file