"docs/source/vscode:/vscode.git/clone" did not exist on "6a3d1a98e031a09e7783134ae76dc6f8a358d568"
Unverified Commit 0ba15ced authored by Aymeric Roucher's avatar Aymeric Roucher Committed by GitHub
Browse files

Reboot Agents (#30387)



* Create CodeAgent and ReactAgent

* Fix formatting errors

* Update documentation for agents

* Add custom errors, improve logging

* Support variable usage in ReactAgent

* add messages

* Add message passing format

* Create React Code Agent

* Update

* Refactoring

* Fix errors

* Improve python interpreter

* Only non-tensor inputs should be sent to device

* Calculator tool slight refactor

* Improve docstrings

* Refactor

* Fix tests

* Fix more tests

* Fix even more tests

* Fix tests by replacing output and input types

* Fix operand type issue

* two small fixes

* EM TTS

* Fix agent running type errors

* Change text to speech tests to allow changed outputs

* Update doc with new agent types

* Improve code interpreter

* If max iterations reached, provide a real answer instead of an error

* Add edge case in interpreter

* Add safe imports to the interpreter

* Interpreter tweaks: tuples and listcomp

* Make style

* Make quality

* Add dictcomp to interpreter

* Rename ReactJSONAgent to ReactJsonAgent

* Misc changes

* ToolCollection

* Rename agent's logger to self.logger

* Add while loops to interpreter

* Update doc with new tools. still need to mention collections

* Add collections to the doc

* Small fixes on logs and interpretor

* Fix toolbox return type

* Docs + fixup

* Skip doctests

* Correct prompts with improved examples and formatting

* Update prompt

* Remove outdated docs

* Change agent to accept Toolbox object for tools

* Remove calculator tool

* Propagate removal of calculator in doc

* Fix 2 failing workflows

* Simplify additional argument passing

* AgentType audio

* Minor changes: function name, types

* Remove calculator tests

* Fix test

* Fix torch requirement

* Fix final answer tests

* Style fixes

* Fix tests

* Update docstrings with calculator removal

* Small type hint fixes

* Update tests/agents/test_translation.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update tests/agents/test_python_interpreter.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/agents/default_tools.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/agents/tools.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update tests/agents/test_agents.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/models/bert/configuration_bert.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/agents/tools.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/agents/speech_to_text.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update tests/agents/test_speech_to_text.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update tests/agents/test_tools_common.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* pygments

* Answer comments

* Cleaning up

* Simplifying init for all agents

* Improving prompts and making code nicer

* Style fixes

* Add multiple comparator test in interpreter

* Style fixes

* Improve BERT example in documentation

* Add examples to doc

* Fix python interpreter quality

* Logging improvements

* Change test flag to agents

* Quality fix

* Add example for HfEngine

* Improve conversation example for HfEngine

* typo fix

* Verify doc

* Update docs/source/en/agents.md
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/agents/agents.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/agents/prompts.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/agents/python_interpreter.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update docs/source/en/agents.md
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Fix style issues

* local s2t tool

---------
Co-authored-by: default avatarCyril Kondratenko <kkn1993@gmail.com>
Co-authored-by: default avatarLysandre <lysandre@huggingface.co>
Co-authored-by: default avatarLysandre <lysandre.debut@reseau.eseo.fr>
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>
parent 3733391c
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8 # coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved. # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -14,28 +14,26 @@ ...@@ -14,28 +14,26 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from ..models.whisper import WhisperForConditionalGeneration, WhisperProcessor from ..models.whisper import WhisperForConditionalGeneration, WhisperProcessor
from .base import PipelineTool from .tools import PipelineTool
class SpeechToTextTool(PipelineTool): class SpeechToTextTool(PipelineTool):
default_checkpoint = "openai/whisper-base" default_checkpoint = "distil-whisper/distil-large-v3"
description = ( description = "This is a tool that transcribes an audio into text. It returns the transcribed text."
"This is a tool that transcribes an audio into text. It takes an input named `audio` and returns the "
"transcribed text."
)
name = "transcriber" name = "transcriber"
pre_processor_class = WhisperProcessor pre_processor_class = WhisperProcessor
model_class = WhisperForConditionalGeneration model_class = WhisperForConditionalGeneration
inputs = ["audio"] inputs = {"audio": {"type": "audio", "description": "The audio to transcribe"}}
outputs = ["text"] output_type = "text"
def encode(self, audio): def encode(self, audio):
return self.pre_processor(audio, return_tensors="pt").input_features return self.pre_processor(audio, return_tensors="pt")
def forward(self, inputs): def forward(self, inputs):
return self.model.generate(inputs=inputs) return self.model.generate(inputs["input_features"])
def decode(self, outputs): def decode(self, outputs):
return self.pre_processor.batch_decode(outputs, skip_special_tokens=True)[0] return self.pre_processor.batch_decode(outputs, skip_special_tokens=True)[0]
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8 # coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved. # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -14,11 +14,12 @@ ...@@ -14,11 +14,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import torch import torch
from ..models.speecht5 import SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5Processor from ..models.speecht5 import SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5Processor
from ..utils import is_datasets_available from ..utils import is_datasets_available
from .base import PipelineTool from .tools import PipelineTool
if is_datasets_available(): if is_datasets_available():
...@@ -28,16 +29,15 @@ if is_datasets_available(): ...@@ -28,16 +29,15 @@ if is_datasets_available():
class TextToSpeechTool(PipelineTool): class TextToSpeechTool(PipelineTool):
default_checkpoint = "microsoft/speecht5_tts" default_checkpoint = "microsoft/speecht5_tts"
description = ( description = (
"This is a tool that reads an English text out loud. It takes an input named `text` which should contain the " "This is a tool that reads an English text out loud. It returns a waveform object containing the sound."
"text to read (in English) and returns a waveform object containing the sound."
) )
name = "text_reader" name = "text_to_speech"
pre_processor_class = SpeechT5Processor pre_processor_class = SpeechT5Processor
model_class = SpeechT5ForTextToSpeech model_class = SpeechT5ForTextToSpeech
post_processor_class = SpeechT5HifiGan post_processor_class = SpeechT5HifiGan
inputs = ["text"] inputs = {"text": {"type": "text", "description": "The text to read out loud (in English)"}}
outputs = ["audio"] output_type = "audio"
def setup(self): def setup(self):
if self.post_processor is None: if self.post_processor is None:
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from ..models.auto import AutoModelForSeq2SeqLM, AutoTokenizer from ..models.auto import AutoModelForSeq2SeqLM, AutoTokenizer
from .base import PipelineTool from .tools import PipelineTool
LANGUAGE_CODES = { LANGUAGE_CODES = {
...@@ -231,27 +231,35 @@ class TranslationTool(PipelineTool): ...@@ -231,27 +231,35 @@ class TranslationTool(PipelineTool):
Example: Example:
```py ```py
from transformers.tools import TranslationTool from transformers.agents import TranslationTool
translator = TranslationTool() translator = TranslationTool()
translator("This is a super nice API!", src_lang="English", tgt_lang="French") translator("This is a super nice API!", src_lang="English", tgt_lang="French")
``` ```
""" """
lang_to_code = LANGUAGE_CODES
default_checkpoint = "facebook/nllb-200-distilled-600M" default_checkpoint = "facebook/nllb-200-distilled-600M"
description = ( description = (
"This is a tool that translates text from a language to another. It takes three inputs: `text`, which should " "This is a tool that translates text from a language to another."
"be the text to translate, `src_lang`, which should be the language of the text to translate and `tgt_lang`, " f"Both `src_lang`and `tgt_lang` should belong to this list of languages: {list(lang_to_code.keys())}."
"which should be the language for the desired ouput language. Both `src_lang` and `tgt_lang` are written in "
"plain English, such as 'Romanian', or 'Albanian'. It returns the text translated in `tgt_lang`."
) )
name = "translator" name = "translator"
pre_processor_class = AutoTokenizer pre_processor_class = AutoTokenizer
model_class = AutoModelForSeq2SeqLM model_class = AutoModelForSeq2SeqLM
lang_to_code = LANGUAGE_CODES
inputs = ["text", "text", "text"] inputs = {
outputs = ["text"] "text": {"type": "text", "description": "The text to translate"},
"src_lang": {
"type": "text",
"description": "The language of the text to translate. Written in plain English, such as 'Romanian', or 'Albanian'",
},
"tgt_lang": {
"type": "text",
"description": "The language for the desired ouput language. Written in plain English, such as 'Romanian', or 'Albanian'",
},
}
output_type = "text"
def encode(self, text, src_lang, tgt_lang): def encode(self, text, src_lang, tgt_lang):
if src_lang not in self.lang_to_code: if src_lang not in self.lang_to_code:
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment