Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
DataFlow
Commits
97e8278b
Commit
97e8278b
authored
Dec 03, 2025
by
zzg_666
Browse files
适配后端vllm
parents
Pipeline
#3071
canceled with stages
Changes
385
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
227 additions
and
0 deletions
+227
-0
dataflow/statics/playground/playground/reasoning_general_pipeline.py
...atics/playground/playground/reasoning_general_pipeline.py
+89
-0
dataflow/statics/playground/playground/text_conversation_synthesis_pipeline.py
...ground/playground/text_conversation_synthesis_pipeline.py
+33
-0
dataflow/statics/playground/playground/text_sft_synthesis_from_scratch.py
.../playground/playground/text_sft_synthesis_from_scratch.py
+44
-0
dataflow/statics/playground/playground/text_sft_synthesis_from_seed.py
...ics/playground/playground/text_sft_synthesis_from_seed.py
+28
-0
dataflow/statics/playground/playground/vqa.py
dataflow/statics/playground/playground/vqa.py
+33
-0
No files found.
Too many changes to show.
To preserve performance only
385 of 385+
files are displayed.
Plain diff
Email patch
dataflow/statics/playground/playground/reasoning_general_pipeline.py
0 → 100644
View file @
97e8278b
from
dataflow.operators.reasoning
import
(
ReasoningQuestionGenerator
,
ReasoningAnswerGenerator
,
)
from
dataflow.operators.reasoning
import
ReasoningQuestionFilter
,
ReasoningAnswerNgramFilter
,
ReasoningAnswerModelJudgeFilter
from
dataflow.utils.storage
import
FileStorage
from
dataflow.serving
import
APILLMServing_request
from
dataflow.core
import
LLMServingABC
from
dataflow.prompts.reasoning.general
import
(
GeneralQuestionFilterPrompt
,
GeneralAnswerGeneratorPrompt
,
GeneralQuestionSynthesisPrompt
,
)
from
dataflow.prompts.model_evaluation.general
import
AnswerJudgePrompt
class
GeneralReasoning_APIPipeline
():
def
__init__
(
self
,
llm_serving
:
LLMServingABC
=
None
):
self
.
storage
=
FileStorage
(
first_entry_file_name
=
"../example_data/ReasoningPipeline/pipeline_general.json"
,
cache_path
=
"./cache_local"
,
file_name_prefix
=
"dataflow_cache_step"
,
cache_type
=
"jsonl"
,
)
# use API server as LLM serving
self
.
llm_serving
=
APILLMServing_request
(
api_url
=
"http://api.openai.com/v1/chat/completions"
,
model_name
=
"gpt-4o"
,
max_workers
=
30
)
self
.
question_filter_step1
=
ReasoningQuestionFilter
(
system_prompt
=
"You are an expert in evaluating mathematical problems. Follow the user's instructions strictly and output your final judgment in the required JSON format."
,
llm_serving
=
self
.
llm_serving
,
prompt_template
=
GeneralQuestionFilterPrompt
()
)
self
.
question_gen_step2
=
ReasoningQuestionGenerator
(
num_prompts
=
1
,
llm_serving
=
self
.
llm_serving
,
prompt_template
=
GeneralQuestionSynthesisPrompt
()
)
self
.
answer_generator_step3
=
ReasoningAnswerGenerator
(
llm_serving
=
self
.
llm_serving
,
prompt_template
=
GeneralAnswerGeneratorPrompt
()
)
self
.
answer_model_judge_step4
=
ReasoningAnswerModelJudgeFilter
(
llm_serving
=
self
.
llm_serving
,
prompt_template
=
AnswerJudgePrompt
(),
keep_all_samples
=
True
)
self
.
answer_ngram_filter_step5
=
ReasoningAnswerNgramFilter
(
min_score
=
0.1
,
max_score
=
1.0
,
ngrams
=
5
)
def
forward
(
self
):
self
.
question_filter_step1
.
run
(
storage
=
self
.
storage
.
step
(),
input_key
=
"instruction"
,
)
self
.
question_gen_step2
.
run
(
storage
=
self
.
storage
.
step
(),
input_key
=
"instruction"
,
)
self
.
answer_generator_step3
.
run
(
storage
=
self
.
storage
.
step
(),
input_key
=
"instruction"
,
output_key
=
"generated_cot"
),
self
.
answer_model_judge_step4
.
run
(
storage
=
self
.
storage
.
step
(),
input_question_key
=
"instruction"
,
input_answer_key
=
"generated_cot"
,
input_reference_key
=
"golden_answer"
),
self
.
answer_ngram_filter_step5
.
run
(
storage
=
self
.
storage
.
step
(),
input_question_key
=
"instruction"
,
input_answer_key
=
"generated_cot"
)
if
__name__
==
"__main__"
:
pl
=
GeneralReasoning_APIPipeline
()
pl
.
forward
()
dataflow/statics/playground/playground/text_conversation_synthesis_pipeline.py
0 → 100644
View file @
97e8278b
from
dataflow.operators.conversations
import
ConsistentChatGenerator
from
dataflow.utils.storage
import
FileStorage
from
dataflow.serving
import
APILLMServing_request
class
TextPipeline
():
def
__init__
(
self
):
self
.
storage
=
FileStorage
(
first_entry_file_name
=
""
,
cache_path
=
"./cache"
,
file_name_prefix
=
"dataflow_cache_step"
,
cache_type
=
"jsonl"
,
)
serving
=
APILLMServing_request
(
api_url
=
"http://123.129.219.111:3000/v1/chat/completions"
,
model_name
=
"gpt-4o"
,
max_workers
=
100
)
self
.
model_cache_dir
=
'./dataflow_cache'
self
.
processor
=
ConsistentChatGenerator
(
llm_serving
=
serving
,
num_dialogs_per_intent
=
5
)
def
forward
(
self
):
self
.
processor
.
run
(
storage
=
self
.
storage
.
step
()
)
if
__name__
==
"__main__"
:
# This is a test entry point for the TextPipeline
# It will run the forward method of the TextPipeline class
# to process the data and generate the output.
print
(
"Running TextPipeline..."
)
model
=
TextPipeline
()
model
.
forward
()
\ No newline at end of file
dataflow/statics/playground/playground/text_sft_synthesis_from_scratch.py
0 → 100644
View file @
97e8278b
from
dataflow.operators.text_sft
import
AlpagasusFilter
from
dataflow.operators.text_sft
import
CondorGenerator
from
dataflow.operators.text_sft
import
CondorRefiner
from
dataflow.utils.storage
import
FileStorage
from
dataflow.serving
import
APILLMServing_request
class
TextPipeline
():
def
__init__
(
self
):
self
.
storage
=
FileStorage
(
first_entry_file_name
=
""
,
cache_path
=
"./cache"
,
file_name_prefix
=
"dataflow_cache_step"
,
cache_type
=
"jsonl"
,
)
self
.
model_cache_dir
=
'./dataflow_cache'
self
.
num_generated_samples
=
3
llm_serving
=
APILLMServing_request
(
api_url
=
"https://api.openai.com/v1/chat/completions"
,
model_name
=
"gpt-4o"
,
max_workers
=
100
)
self
.
generator
=
CondorGenerator
(
llm_serving
=
llm_serving
,
num_samples
=
self
.
num_generated_samples
)
self
.
refiner
=
CondorRefiner
(
llm_serving
=
llm_serving
)
self
.
alpagasus_filter
=
AlpagasusFilter
(
min_score
=
3
,
max_score
=
5
,
llm_serving
=
llm_serving
)
def
forward
(
self
):
self
.
generator
.
run
(
storage
=
self
.
storage
.
step
()
)
self
.
refiner
.
run
(
storage
=
self
.
storage
.
step
(),
input_instruction_key
=
'instruction'
,
input_output_key
=
'output'
)
self
.
alpagasus_filter
.
run
(
storage
=
self
.
storage
.
step
(),
input_instruction_key
=
'instruction'
,
input_input_key
=
"input"
,
input_output_key
=
'output'
)
model
=
TextPipeline
()
model
.
forward
()
dataflow/statics/playground/playground/text_sft_synthesis_from_seed.py
0 → 100644
View file @
97e8278b
from
dataflow.operators.text_sft
import
SFTGeneratorSeed
from
dataflow.utils.storage
import
FileStorage
from
dataflow.serving
import
APILLMServing_request
class
TextPipeline
():
def
__init__
(
self
):
self
.
storage
=
FileStorage
(
first_entry_file_name
=
"../example_data/GeneralTextPipeline/pt_input.jsonl"
,
cache_path
=
"./cache"
,
file_name_prefix
=
"dataflow_cache_step"
,
cache_type
=
"jsonl"
,
)
self
.
model_cache_dir
=
'./dataflow_cache'
self
.
num_generated_samples
=
3
llm_serving
=
APILLMServing_request
(
api_url
=
"https://api.openai.com/v1/chat/completions"
,
model_name
=
"gpt-4o"
,
max_workers
=
100
)
self
.
generator
=
SFTGeneratorSeed
(
llm_serving
=
llm_serving
,
custom_prompt
=
"Try to make the question suitable for middle school students."
)
def
forward
(
self
):
self
.
generator
.
run
(
storage
=
self
.
storage
.
step
()
)
model
=
TextPipeline
()
model
.
forward
()
dataflow/statics/playground/playground/vqa.py
0 → 100644
View file @
97e8278b
from
dataflow.operators.core_vision
import
PromptedVQAGenerator
from
dataflow.serving
import
APIVLMServing_openai
from
dataflow.utils.storage
import
FileStorage
class
VQA_generator
():
def
__init__
(
self
):
self
.
prompt
=
"Describe the image in detail."
self
.
storage
=
FileStorage
(
first_entry_file_name
=
"../example_data/VQA/pic_path.json"
,
cache_path
=
"./cache"
,
file_name_prefix
=
"vqa"
,
cache_type
=
"json"
,
)
self
.
llm_serving
=
APIVLMServing_openai
(
model_name
=
"o4-mini"
,
api_url
=
"https://api.openai.com/v1"
,
# openai api url
key_name_of_api_key
=
"DF_API_KEY"
,
)
self
.
vqa_generate
=
PromptedVQAGenerator
(
self
.
llm_serving
,
self
.
prompt
)
def
forward
(
self
):
self
.
vqa_generate
.
run
(
storage
=
self
.
storage
.
step
(),
input_key
=
"raw_content"
,
)
if
__name__
==
"__main__"
:
VQA_generator
=
VQA_generator
()
VQA_generator
.
forward
()
\ No newline at end of file
Prev
1
…
16
17
18
19
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment