Alternative agent plan (#32295)

* new agent plan * plan type assertion * style corrections * better prompt naming * make fixup

Alternative agent plan (#32295)
* new agent plan * plan type assertion * style corrections * better prompt naming * make fixup
bd54ed2e · plaggy · GitHub · e68ec18c · bd54ed2e · bd54ed2e
Unverified Commit bd54ed2e authored Jul 30, 2024 by plaggy Committed by GitHub Jul 30, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 319 additions and 9 deletions

src/transformers/agents/agents.py src/transformers/agents/agents.py +14 -9

src/transformers/agents/prompts.py src/transformers/agents/prompts.py +305 -0

No files found.
--- a/src/transformers/agents/agents.py
+++ b/src/transformers/agents/agents.py
@@ -17,7 +17,7 @@
 import json
 import logging
 import re
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union

 from .. import is_torch_available
 from ..utils import logging as transformers_logging
@@ -30,13 +30,12 @@ from .prompts import (
    DEFAULT_REACT_CODE_SYSTEM_PROMPT,
    DEFAULT_REACT_JSON_SYSTEM_PROMPT,
    PLAN_UPDATE_FINAL_PLAN_REDACTION,
+    PROMPTS_FOR_INITIAL_PLAN,
+    PROMPTS_FOR_PLAN_UPDATE,
+    SUPPORTED_PLAN_TYPES,
    SYSTEM_PROMPT_FACTS,
    SYSTEM_PROMPT_FACTS_UPDATE,
-    SYSTEM_PROMPT_PLAN,
-    SYSTEM_PROMPT_PLAN_UPDATE,
    USER_PROMPT_FACTS_UPDATE,
-    USER_PROMPT_PLAN,
-    USER_PROMPT_PLAN_UPDATE,
 )
 from .python_interpreter import LIST_SAFE_MODULES, evaluate_python_code
 from .tools import (
@@ -653,9 +652,11 @@ class ReactAgent(Agent):
        llm_engine: Callable = HfEngine(),
        system_prompt: str = DEFAULT_REACT_CODE_SYSTEM_PROMPT,
        tool_description_template: str = DEFAULT_TOOL_DESCRIPTION_TEMPLATE,
+        plan_type: Literal[tuple(SUPPORTED_PLAN_TYPES)] = SUPPORTED_PLAN_TYPES[0],
        planning_interval: Optional[int] = None,
        **kwargs,
    ):
+        assert plan_type in SUPPORTED_PLAN_TYPES, f"plan type {plan_type} is not supported"
        super().__init__(
            tools=tools,
            llm_engine=llm_engine,
@@ -664,6 +665,7 @@ class ReactAgent(Agent):
            **kwargs,
        )
        self.planning_interval = planning_interval
+        self.plan_type = plan_type

    def provide_final_answer(self, task) -> str:
        """
@@ -794,10 +796,13 @@ Now begin!""",

            answer_facts = self.llm_engine([message_prompt_facts, message_prompt_task])

-            message_system_prompt_plan = {"role": MessageRole.SYSTEM, "content": SYSTEM_PROMPT_PLAN}
+            message_system_prompt_plan = {
+                "role": MessageRole.SYSTEM,
+                "content": PROMPTS_FOR_INITIAL_PLAN[self.plan_type]["system"],
+            }
            message_user_prompt_plan = {
                "role": MessageRole.USER,
-                "content": USER_PROMPT_PLAN.format(
+                "content": PROMPTS_FOR_INITIAL_PLAN[self.plan_type]["user"].format(
                    task=task,
                    tool_descriptions=self._toolbox.show_tool_descriptions(self.tool_description_template),
                    answer_facts=answer_facts,
@@ -837,11 +842,11 @@ Now begin!""",
            # Redact updated plan
            plan_update_message = {
                "role": MessageRole.SYSTEM,
-                "content": SYSTEM_PROMPT_PLAN_UPDATE.format(task=task),
+                "content": PROMPTS_FOR_PLAN_UPDATE[self.plan_type]["system"].format(task=task),
            }
            plan_update_message_user = {
                "role": MessageRole.USER,
-                "content": USER_PROMPT_PLAN_UPDATE.format(
+                "content": PROMPTS_FOR_PLAN_UPDATE[self.plan_type]["user"].format(
                    task=task,
                    tool_descriptions=self._toolbox.show_tool_descriptions(self.tool_description_template),
                    facts_update=facts_update,

--- a/src/transformers/agents/prompts.py
+++ b/src/transformers/agents/prompts.py
@@ -471,6 +471,299 @@ After writing the final step of the plan, write the '\n<end_plan>' tag and stop

 Now write your new plan below."""

+SYSTEM_PROMPT_PLAN_STRUCTURED = """Output a step-by-step plan to solve the task using the given tools.
+This plan should involve individual tasks based on the avilable tools, that if executed correctly will yield the correct answer. Each step should be structured as follows:
+Step #n: {
+  "description": <description of what the step does and its output>
+  "tool": <tool to use>,
+  "params": {
+      <parameters to pass to the tool as a valid dict>
+  }
+  "output_var": <output variable name>
+}
+Each step must be necessary to reach the final answer. Steps should reuse outputs produced by earlier steps. The last step must be the final answer.
+
+Below are some examples:
+
+Example 1:
+------
+Inputs:
+---
+Task:
+How many encoder blocks were in the first attention-only ML architecture published?
+
+[FACTS LIST]:
+### 1. Facts given in the task
+- The paper first introduced an attention-only ML architecture.
+- The specific information required is the page number where the number of encoder blocks is stated.
+- No local files are provided for access.
+
+### 2. Facts to look up
+- The title and authors of the paper that first introduced an attention-only ML architecture.
+  - Source: Online search (e.g., Google Scholar, arXiv, or other academic databases)
+- The full text of the identified paper.
+  - Source: Online academic repositories (e.g., arXiv, journal websites)
+- The specific page number in the paper where the number of encoder blocks is mentioned.
+  - Source: The content of the identified paper
+
+### 3. Facts to derive
+- By identifying the correct paper and locating the specific page, we will derive the page number where the number of encoder blocks is stated.
+  - Logical steps: Identify the correct paper, access its content, search for the term "encoder blocks," and note the page number where this information is found.
+```
+
+[STEP 1 TOOL CALL]: {'tool_name': 'code interpreter', 'tool_arguments': '# Step 1: Identify the title and authors of the paper that first introduced an attention-only ML architecture.\nanswer = ask_search_agent(query="Can you find the title and authors of the paper that first introduced an attention-only machine learning architecture? Please provide the full citation.")\nprint(answer)'}
+[OUTPUT OF STEP 1] Observation: **Title**: Attention Is All You Need
+**Authors**: Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin
+[STEP 2 TOOL CALL]: {'tool_name': 'code interpreter', 'tool_arguments': '# Step 1: Find the full text of the identified paper on arXiv\\npaper_url = "https://arxiv.org/pdf/1706.03762.pdf"\\nprint(paper_url)'}
+[OUTPUT OF STEP 2] Observation: https://arxiv.org/pdf/1706.03762.pdf
+---
+
+Output plan:
+---
+Step #1: {
+  "description": "Open the PDF of the paper from the provided URL and search within the text of the paper for the  mention of "encoder blocks"",
+  "tool": "inspect_file_as_text",
+  "params": {
+    "file_path": "https://arxiv.org/pdf/1706.03762.pdf",
+    "question": "On which page is the number of encoder blocks mentioned?"
+  },
+  "output_var": "page_number"
+}
+
+Step #2: {
+  "description": "Provide the final answer",
+  "tool": "final_answer",
+  "params": {
+      "answer": "{page_number}"
+  },
+  "output_var": ""
+}
+------
+
+Example 2:
+------
+Inputs:
+---
+Task:
+How many golf balls fits into a Boeing-747?
+
+[FACTS LIST]:
+### 1. Facts given in the task
+- The task requires calculating the number of golf balls that fir into a Boeing-747
+### 2. Facts to look up
+- The volume of a golf ball
+- The volume of a Boeing-747
+### 3. Facts to derive
+- Once the volumes are known the final answer can be calculated
+---
+Output plan:
+---
+Step #1: {
+  "description": "Find the volume of a Boeing-747",
+  "tool": "web_search",
+  "params": {
+      "query": "What is the internal volume of a Boeing-747 in cubic meters?"
+  },
+  "output_var": "boeing_volume"
+}
+
+Step #2: {
+  "description": "Find the volume of a standard golf ball",
+  "tool": "ask_search_agent",
+  "params": {
+      "query": "What is the volume of a standard golf ball in cubic centimeters?"
+  },
+  "output_var": "golf_ball_volume"
+}
+
+Step #3: {
+  "description": "Convert the volume of a golf ball from cubic centimeters to cubic meters. Calculate the number of golf balls that fit into the Boeing-747 by dividing the internal volume of the Boeing-747 by the volume of a golf ball.",
+  "tool": "python_code",
+  "params": {
+      "code": "golf_ball_volume_m3 = golf_ball_volume / 1e6\nnumber_of_golf_balls = boeing_volume / golf_ball_volume_m3"
+  },
+  "output_var": "number_of_golf_balls"
+}
+
+Step #4: {
+  "description": "Provide the final answer",
+  "tool": "final_answer",
+  "params": {
+      "answer": "{number_of_golf_balls}"
+  },
+  "output_var": ""
+}
+------
+Above example were using tools that might not exist for you.
+Your goal is to create a plan to solve the task."""
+
+USER_PROMPT_PLAN_STRUCTURED = """
+Here are your inputs:
+
+Task:
+```
+{task}
+```
+
+Your plan can leverage any of these tools:
+{tool_descriptions}
+These tools are Python functions which you can call with code. You also have access to a Python interpreter so you can run Python code.
+
+List of facts that you know:
+```
+{answer_facts}
+```
+
+Now for the given task, create a plan taking into account the list of facts.
+After writing the final step of the plan, write the '\n<end_plan>' tag and stop there. Output the plan only and nothing else."""
+
+SYSTEM_PROMPT_PLAN_UPDATE_STRUCTURED = """Output a step-by-step plan to solve the task using the given tools.
+This plan should involve individual tasks based on the avilable tools, that if executed correctly will yield the correct answer. Each step should be structured as follows:
+Step #n: {{
+  "description": <description of what the step does and its output>
+  "tool": <tool to use>,
+  "params": {{
+      <parameters to pass to the tool as a valid dict>
+  }}
+  "output_var": <output variable name>
+}}
+Each step must be necessary to reach the final answer. Steps should reuse outputs produced by earlier steps. The last step must be the final answer.
+
+Below are some examples:
+
+Example 1:
+------
+Inputs:
+---
+Task:
+How many encoder blocks were in the first attention-only ML architecture published?
+
+[FACTS LIST]:
+### 1. Facts given in the task
+- The paper first introduced an attention-only ML architecture.
+- The specific information required is the page number where the number of encoder blocks is stated.
+- No local files are provided for access.
+
+### 2. Facts to look up
+- The title and authors of the paper that first introduced an attention-only ML architecture.
+  - Source: Online search (e.g., Google Scholar, arXiv, or other academic databases)
+- The full text of the identified paper.
+  - Source: Online academic repositories (e.g., arXiv, journal websites)
+- The specific page number in the paper where the number of encoder blocks is mentioned.
+  - Source: The content of the identified paper
+
+### 3. Facts to derive
+- By identifying the correct paper and locating the specific page, we will derive the page number where the number of encoder blocks is stated.
+  - Logical steps: Identify the correct paper, access its content, search for the term "encoder blocks," and note the page number where this information is found.
+```
+
+[STEP 1 TOOL CALL]: {{'tool_name': 'code interpreter', 'tool_arguments': '# Step 1: Identify the title and authors of the paper that first introduced an attention-only ML architecture.\nanswer = ask_search_agent(query="Can you find the title and authors of the paper that first introduced an attention-only machine learning architecture? Please provide the full citation.")\nprint(answer)'}}
+[OUTPUT OF STEP 1] Observation: **Title**: Attention Is All You Need
+**Authors**: Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin
+[STEP 2 TOOL CALL]: {{'tool_name': 'code interpreter', 'tool_arguments': '# Step 1: Find the full text of the identified paper on arXiv\\npaper_url = "https://arxiv.org/pdf/1706.03762.pdf"\\nprint(paper_url)'}}
+[OUTPUT OF STEP 2] Observation: https://arxiv.org/pdf/1706.03762.pdf
+---
+
+Output plan:
+---
+Step #1: {{
+  "description": "Open the PDF of the paper from the provided URL and search within the text of the paper for the  mention of "encoder blocks"",
+  "tool": "inspect_file_as_text",
+  "params": {{
+    "file_path": "https://arxiv.org/pdf/1706.03762.pdf",
+    "question": "On which page is the number of encoder blocks mentioned?"
+  }},
+  "output_var": "page_number"
+}}
+
+Step #2: {{
+  "description": "Provide the final answer",
+  "tool": "final_answer",
+  "params": {{
+      "answer": "{{page_number}}"
+  }},
+  "output_var": ""
+}}
+------
+
+Example 2:
+------
+Inputs:
+---
+Task:
+How many golf balls fits into a Boeing-747?
+
+[FACTS LIST]:
+### 1. Facts given in the task
+- The task requires calculating the number of golf balls that fir into a Boeing-747
+### 2. Facts to look up
+- The volume of a golf ball
+- The volume of a Boeing-747
+### 3. Facts to derive
+- Once the volumes are known the final answer can be calculated
+---
+Output plan:
+---
+Step #1: {{
+  "description": "Find the volume of a Boeing-747",
+  "tool": "web_search",
+  "params": {{
+      "query": "What is the internal volume of a Boeing-747 in cubic meters?"
+  }},
+  "output_var": "boeing_volume"
+}}
+
+Step #2: {{
+  "description": "Find the volume of a standard golf ball",
+  "tool": "ask_search_agent",
+  "params": {{
+      "query": "What is the volume of a standard golf ball in cubic centimeters?"
+  }},
+  "output_var": "golf_ball_volume"
+}}
+
+Step #3: {{
+  "description": "Convert the volume of a golf ball from cubic centimeters to cubic meters. Calculate the number of golf balls that fit into the Boeing-747 by dividing the internal volume of the Boeing-747 by the volume of a golf ball.",
+  "tool": "python_code",
+  "params": {{
+      "code": "golf_ball_volume_m3 = golf_ball_volume / 1e6\nnumber_of_golf_balls = boeing_volume / golf_ball_volume_m3"
+  }},
+  "output_var": "number_of_golf_balls"
+}}
+
+Step #4: {{
+  "description": "Provide the final answer",
+  "tool": "final_answer",
+  "params": {{
+      "answer": "{{number_of_golf_balls}}"
+  }},
+  "output_var": ""
+}}
+------
+Above example were using tools that might not exist for you.
+Find below the record of what has been tried so far to solve it. Your goal is to create an updated plan to solve the task."""
+
+USER_PROMPT_PLAN_UPDATE_STRUCTURED = """
+Here are your inputs:
+
+Task:
+```
+{task}
+```
+
+Your plan can leverage any of these tools:
+{tool_descriptions}
+These tools are Python functions which you can call with code. You also have access to a Python interpreter so you can run Python code.
+
+List of facts that you know:
+```
+{facts_update}
+```
+
+Now for the given task, create a plan taking into account the above inputs and list of facts.
+Beware that you have {remaining_steps} steps remaining.
+After writing the final step of the plan, write the '\n<end_plan>' tag and stop there. Output the plan only and nothing else."""
+
 PLAN_UPDATE_FINAL_PLAN_REDACTION = """I still need to solve the task I was given:
 ```
 {task}
@@ -480,3 +773,15 @@ Here is my new/updated plan of action to solve the task:
 ```
 {plan_update}
 ```"""
+
+SUPPORTED_PLAN_TYPES = ["default", "structured"]
+
+PROMPTS_FOR_INITIAL_PLAN = {
+    "default": {"system": SYSTEM_PROMPT_PLAN, "user": USER_PROMPT_PLAN},
+    "structured": {"system": SYSTEM_PROMPT_PLAN_STRUCTURED, "user": USER_PROMPT_PLAN_STRUCTURED},
+}
+
+PROMPTS_FOR_PLAN_UPDATE = {
+    "default": {"system": SYSTEM_PROMPT_PLAN_UPDATE, "user": USER_PROMPT_PLAN_UPDATE},
+    "structured": {"system": SYSTEM_PROMPT_PLAN_UPDATE_STRUCTURED, "user": USER_PROMPT_PLAN_UPDATE_STRUCTURED},
+}