Commit 97e8278b authored by zzg_666's avatar zzg_666
Browse files

适配后端vllm

parents
Pipeline #3071 canceled with stages
from dataflow.operators.reasoning import (
ReasoningQuestionGenerator,
ReasoningAnswerGenerator,
)
from dataflow.operators.reasoning import ReasoningQuestionFilter, ReasoningAnswerNgramFilter, ReasoningAnswerModelJudgeFilter
from dataflow.utils.storage import FileStorage
from dataflow.serving import APILLMServing_request
from dataflow.core import LLMServingABC
from dataflow.prompts.reasoning.general import (
GeneralQuestionFilterPrompt,
GeneralAnswerGeneratorPrompt,
GeneralQuestionSynthesisPrompt,
)
from dataflow.prompts.model_evaluation.general import AnswerJudgePrompt
class GeneralReasoning_APIPipeline():
def __init__(self, llm_serving: LLMServingABC = None):
self.storage = FileStorage(
first_entry_file_name="../example_data/ReasoningPipeline/pipeline_general.json",
cache_path="./cache_local",
file_name_prefix="dataflow_cache_step",
cache_type="jsonl",
)
# use API server as LLM serving
self.llm_serving = APILLMServing_request(
api_url="http://api.openai.com/v1/chat/completions",
model_name="gpt-4o",
max_workers=30
)
self.question_filter_step1 = ReasoningQuestionFilter(
system_prompt="You are an expert in evaluating mathematical problems. Follow the user's instructions strictly and output your final judgment in the required JSON format.",
llm_serving=self.llm_serving,
prompt_template=GeneralQuestionFilterPrompt()
)
self.question_gen_step2 = ReasoningQuestionGenerator(
num_prompts=1,
llm_serving=self.llm_serving,
prompt_template=GeneralQuestionSynthesisPrompt()
)
self.answer_generator_step3 = ReasoningAnswerGenerator(
llm_serving=self.llm_serving,
prompt_template=GeneralAnswerGeneratorPrompt()
)
self.answer_model_judge_step4 = ReasoningAnswerModelJudgeFilter(
llm_serving=self.llm_serving,
prompt_template=AnswerJudgePrompt(),
keep_all_samples=True
)
self.answer_ngram_filter_step5 = ReasoningAnswerNgramFilter(
min_score = 0.1,
max_score = 1.0,
ngrams = 5
)
def forward(self):
self.question_filter_step1.run(
storage = self.storage.step(),
input_key = "instruction",
)
self.question_gen_step2.run(
storage = self.storage.step(),
input_key = "instruction",
)
self.answer_generator_step3.run(
storage = self.storage.step(),
input_key = "instruction",
output_key = "generated_cot"
),
self.answer_model_judge_step4.run(
storage = self.storage.step(),
input_question_key = "instruction",
input_answer_key = "generated_cot",
input_reference_key = "golden_answer"
),
self.answer_ngram_filter_step5.run(
storage = self.storage.step(),
input_question_key = "instruction",
input_answer_key = "generated_cot"
)
if __name__ == "__main__":
pl = GeneralReasoning_APIPipeline()
pl.forward()
from dataflow.operators.conversations import ConsistentChatGenerator
from dataflow.utils.storage import FileStorage
from dataflow.serving import APILLMServing_request
class TextPipeline():
def __init__(self):
self.storage = FileStorage(
first_entry_file_name="",
cache_path="./cache",
file_name_prefix="dataflow_cache_step",
cache_type="jsonl",
)
serving = APILLMServing_request(
api_url="http://123.129.219.111:3000/v1/chat/completions",
model_name="gpt-4o",
max_workers=100
)
self.model_cache_dir = './dataflow_cache'
self.processor = ConsistentChatGenerator(llm_serving=serving, num_dialogs_per_intent=5)
def forward(self):
self.processor.run(
storage=self.storage.step()
)
if __name__ == "__main__":
# This is a test entry point for the TextPipeline
# It will run the forward method of the TextPipeline class
# to process the data and generate the output.
print("Running TextPipeline...")
model = TextPipeline()
model.forward()
\ No newline at end of file
from dataflow.operators.text_sft import AlpagasusFilter
from dataflow.operators.text_sft import CondorGenerator
from dataflow.operators.text_sft import CondorRefiner
from dataflow.utils.storage import FileStorage
from dataflow.serving import APILLMServing_request
class TextPipeline():
def __init__(self):
self.storage = FileStorage(
first_entry_file_name="",
cache_path="./cache",
file_name_prefix="dataflow_cache_step",
cache_type="jsonl",
)
self.model_cache_dir = './dataflow_cache'
self.num_generated_samples = 3
llm_serving = APILLMServing_request(
api_url="https://api.openai.com/v1/chat/completions",
model_name="gpt-4o",
max_workers=100
)
self.generator = CondorGenerator(llm_serving=llm_serving, num_samples=self.num_generated_samples)
self.refiner = CondorRefiner(llm_serving=llm_serving)
self.alpagasus_filter = AlpagasusFilter(min_score=3,max_score=5,llm_serving=llm_serving)
def forward(self):
self.generator.run(
storage=self.storage.step()
)
self.refiner.run(
storage=self.storage.step(),
input_instruction_key='instruction',
input_output_key='output'
)
self.alpagasus_filter.run(
storage=self.storage.step(),
input_instruction_key='instruction',
input_input_key="input",
input_output_key='output'
)
model = TextPipeline()
model.forward()
from dataflow.operators.text_sft import SFTGeneratorSeed
from dataflow.utils.storage import FileStorage
from dataflow.serving import APILLMServing_request
class TextPipeline():
def __init__(self):
self.storage = FileStorage(
first_entry_file_name="../example_data/GeneralTextPipeline/pt_input.jsonl",
cache_path="./cache",
file_name_prefix="dataflow_cache_step",
cache_type="jsonl",
)
self.model_cache_dir = './dataflow_cache'
self.num_generated_samples = 3
llm_serving = APILLMServing_request(
api_url="https://api.openai.com/v1/chat/completions",
model_name="gpt-4o",
max_workers=100
)
self.generator = SFTGeneratorSeed(llm_serving=llm_serving, custom_prompt="Try to make the question suitable for middle school students.")
def forward(self):
self.generator.run(
storage=self.storage.step()
)
model = TextPipeline()
model.forward()
from dataflow.operators.core_vision import PromptedVQAGenerator
from dataflow.serving import APIVLMServing_openai
from dataflow.utils.storage import FileStorage
class VQA_generator():
def __init__(self):
self.prompt = "Describe the image in detail."
self.storage = FileStorage(
first_entry_file_name="../example_data/VQA/pic_path.json",
cache_path="./cache",
file_name_prefix="vqa",
cache_type="json",
)
self.llm_serving = APIVLMServing_openai(
model_name="o4-mini",
api_url="https://api.openai.com/v1", # openai api url
key_name_of_api_key="DF_API_KEY",
)
self.vqa_generate = PromptedVQAGenerator(
self.llm_serving,
self.prompt
)
def forward(self):
self.vqa_generate.run(
storage = self.storage.step(),
input_key = "raw_content",
)
if __name__ == "__main__":
VQA_generator = VQA_generator()
VQA_generator.forward()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment