"tests/vscode:/vscode.git/clone" did not exist on "ab79863e6c4f4df652328af6901be2ee208dacec"
Unverified Commit a2bd09c9 authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[Bugfix] [Reasoning] Add reasoning_start_str/reasoning_end_str properties to...


[Bugfix] [Reasoning] Add reasoning_start_str/reasoning_end_str properties to reasoning parsers (#40566)
Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent 12367487
......@@ -40,6 +40,14 @@ class DeepSeekV3ReasoningParser(ReasoningParser):
else:
self._parser = IdentityReasoningParser(tokenizer, *args, **kwargs)
@property
def reasoning_start_str(self) -> str | None:
return self._parser.reasoning_start_str
@property
def reasoning_end_str(self) -> str | None:
return self._parser.reasoning_end_str
def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
return self._parser.is_reasoning_end(input_ids)
......
......@@ -33,6 +33,14 @@ class IdentityReasoningParser(ReasoningParser):
"constructor during construction."
)
@property
def reasoning_start_str(self) -> str | None:
return None
@property
def reasoning_end_str(self) -> str | None:
return None
def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
# Always return True, since we never treat reasoning specially
return True
......
......@@ -65,6 +65,14 @@ class KimiK2ReasoningParser(ReasoningParser):
"tokens in the tokenizer!"
)
@property
def reasoning_start_str(self) -> str | None:
return self._start_token
@property
def reasoning_end_str(self) -> str | None:
return self._end_token
def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
"""
Check if the reasoning content ends in the input_ids.
......
......@@ -237,6 +237,14 @@ class Olmo3ReasoningParser(ReasoningParser):
think_start=self.think_start, think_end=self.think_end
)
@property
def reasoning_start_str(self) -> str:
return self.think_start
@property
def reasoning_end_str(self) -> str:
return self.think_end
def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
text = self.model_tokenizer.decode(input_ids)
return self.think_end in text
......
......@@ -29,6 +29,7 @@ class Step3ReasoningParser(ReasoningParser):
def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
super().__init__(tokenizer, *args, **kwargs)
self.think_start_token = "<think>"
self.think_end_token = "</think>"
self.reasoning_regex = re.compile(rf"(.*?){self.think_end_token}", re.DOTALL)
......@@ -47,6 +48,14 @@ class Step3ReasoningParser(ReasoningParser):
)
self.think_end_token_id: int = think_end_token_id
@property
def reasoning_start_str(self) -> str:
return self.think_start_token
@property
def reasoning_end_str(self) -> str:
return self.think_end_token
def extract_reasoning_streaming(
self,
previous_text: str,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment