Unverified Commit a2bd09c9 authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[Bugfix] [Reasoning] Add reasoning_start_str/reasoning_end_str properties to...


[Bugfix] [Reasoning] Add reasoning_start_str/reasoning_end_str properties to reasoning parsers (#40566)
Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent 12367487
...@@ -40,6 +40,14 @@ class DeepSeekV3ReasoningParser(ReasoningParser): ...@@ -40,6 +40,14 @@ class DeepSeekV3ReasoningParser(ReasoningParser):
else: else:
self._parser = IdentityReasoningParser(tokenizer, *args, **kwargs) self._parser = IdentityReasoningParser(tokenizer, *args, **kwargs)
@property
def reasoning_start_str(self) -> str | None:
return self._parser.reasoning_start_str
@property
def reasoning_end_str(self) -> str | None:
return self._parser.reasoning_end_str
def is_reasoning_end(self, input_ids: Sequence[int]) -> bool: def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
return self._parser.is_reasoning_end(input_ids) return self._parser.is_reasoning_end(input_ids)
......
...@@ -33,6 +33,14 @@ class IdentityReasoningParser(ReasoningParser): ...@@ -33,6 +33,14 @@ class IdentityReasoningParser(ReasoningParser):
"constructor during construction." "constructor during construction."
) )
@property
def reasoning_start_str(self) -> str | None:
return None
@property
def reasoning_end_str(self) -> str | None:
return None
def is_reasoning_end(self, input_ids: Sequence[int]) -> bool: def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
# Always return True, since we never treat reasoning specially # Always return True, since we never treat reasoning specially
return True return True
......
...@@ -65,6 +65,14 @@ class KimiK2ReasoningParser(ReasoningParser): ...@@ -65,6 +65,14 @@ class KimiK2ReasoningParser(ReasoningParser):
"tokens in the tokenizer!" "tokens in the tokenizer!"
) )
@property
def reasoning_start_str(self) -> str | None:
return self._start_token
@property
def reasoning_end_str(self) -> str | None:
return self._end_token
def is_reasoning_end(self, input_ids: Sequence[int]) -> bool: def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
""" """
Check if the reasoning content ends in the input_ids. Check if the reasoning content ends in the input_ids.
......
...@@ -237,6 +237,14 @@ class Olmo3ReasoningParser(ReasoningParser): ...@@ -237,6 +237,14 @@ class Olmo3ReasoningParser(ReasoningParser):
think_start=self.think_start, think_end=self.think_end think_start=self.think_start, think_end=self.think_end
) )
@property
def reasoning_start_str(self) -> str:
return self.think_start
@property
def reasoning_end_str(self) -> str:
return self.think_end
def is_reasoning_end(self, input_ids: Sequence[int]) -> bool: def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
text = self.model_tokenizer.decode(input_ids) text = self.model_tokenizer.decode(input_ids)
return self.think_end in text return self.think_end in text
......
...@@ -29,6 +29,7 @@ class Step3ReasoningParser(ReasoningParser): ...@@ -29,6 +29,7 @@ class Step3ReasoningParser(ReasoningParser):
def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs): def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
super().__init__(tokenizer, *args, **kwargs) super().__init__(tokenizer, *args, **kwargs)
self.think_start_token = "<think>"
self.think_end_token = "</think>" self.think_end_token = "</think>"
self.reasoning_regex = re.compile(rf"(.*?){self.think_end_token}", re.DOTALL) self.reasoning_regex = re.compile(rf"(.*?){self.think_end_token}", re.DOTALL)
...@@ -47,6 +48,14 @@ class Step3ReasoningParser(ReasoningParser): ...@@ -47,6 +48,14 @@ class Step3ReasoningParser(ReasoningParser):
) )
self.think_end_token_id: int = think_end_token_id self.think_end_token_id: int = think_end_token_id
@property
def reasoning_start_str(self) -> str:
return self.think_start_token
@property
def reasoning_end_str(self) -> str:
return self.think_end_token
def extract_reasoning_streaming( def extract_reasoning_streaming(
self, self,
previous_text: str, previous_text: str,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment