openmanus

d344a08c · mashun1 · d344a08c · d344a08c · d344a08c · d344a08c
Commit d344a08c authored Mar 12, 2025 by mashun1
20 changed files
--- a/.gitattributes
+++ b/.gitattributes
+# HTML code is incorrectly calculated into statistics, so ignore them
+*.html linguist-detectable=false
+# Auto detect text files and perform LF normalization
+* text=auto eol=lf
+# Ensure shell scripts use LF (Linux style) line endings on Windows
+*.sh text eol=lf
+# Treat specific binary files as binary and prevent line ending conversion
+*.png binary
+*.jpg binary
+*.gif binary
+*.ico binary
+*.jpeg binary
+*.mp3 binary
+*.zip binary
+*.bin binary
+# Preserve original line endings for specific document files
+*.doc text eol=crlf
+*.docx text eol=crlf
+*.pdf binary
+# Ensure source code and script files use LF line endings
+*.py text eol=lf
+*.js text eol=lf
+*.html text eol=lf
+*.css text eol=lf
+# Specify custom diff driver for specific file types
+*.md diff=markdown
+*.json diff=json
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.mov filter=lfs diff=lfs merge=lfs -text
+*.webm filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# video
+*.mp4
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+# PyPI configuration file
+.pypirc
+# Logs
+logs/
+# Data
+data/
+# Workspace
+workspace/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+repos:
+  - repo: https://github.com/psf/black
+    rev: 23.1.0
+    hooks:
+      - id: black
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+  - repo: https://github.com/PyCQA/autoflake
+    rev: v2.0.1
+    hooks:
+      - id: autoflake
+        args: [
+          --remove-all-unused-imports,
+          --ignore-init-module-imports,  # 忽略 __init__.py 中的导入
+          --expand-star-imports,
+          --remove-duplicate-keys,
+          --remove-unused-variables,
+          --recursive,
+          --in-place,
+          --exclude=__init__.py,  # 排除 __init__.py 文件
+        ]
+        files: \.py$  # 只处理 Python 文件
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: [
+          "--profile", "black",  # 使用 black 兼容的配置
+          "--filter-files",
+          "--lines-after-imports=2",
+        ]
--- a/LICENSE
+++ b/LICENSE
+MIT License
+Copyright (c) 2025 manna_and_poem
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
+# <div align="center"><strong>OpenManus</strong></div>
+## 简介
+OpenManus是由MetaGPT团队推出的开源项目，旨在复刻Manus的核心功能。基于ReAct模式运行，可调用多种工具。
+## 安装
+组件支持
+ Python 3.11
+### 使用Docker进行安装
+```bash
+docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-py3.11-dtk24.04.3-ubuntu20.04
+docker run --shm-size 50g --network=host --name=openmanus --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v 项目地址(绝对路径):/home/ -v /opt/hyhal:/opt/hyhal:ro -it <your IMAGE ID> bash
+pip install -r requirements.txt
+playwright install
+playwright install-deps
+```
+## 模型
+本项目支持多种模型，要求模型具备tool_call能力。在`config/config.toml`中修改相应配置即可。
+### 本地模型源
+- [ollama](https://developer.sourcefind.cn/codes/OpenDAS/ollama/-/tree/0.5.7)
+- [vllm](https://das.sourcefind.cn:55011/portal/#/installation?id=9385225c-6b7a-11ef-bf92-005056904552&type=frame)
+注意：`api_key`部分不可为空，即使使用随机字符，如`api_key="aa"`。
+### 其他模型源
+- [aliyun](https://help.aliyun.com/zh/model-studio/developer-reference/use-qwen-by-calling-api)
+- [openai](https://platform.openai.com/docs/overview)
+## 验证
+```bash
+python main.py
+```
+## 参考资料
+* https://github.com/mannaandpoem/OpenManus
+## Release Notes
+- 当前使用Bing搜索，搜索结果较google搜索较差。
\ No newline at end of file
--- a/README_official.md
+++ b/README_official.md
+# OpenManus 🙋
+Manus is incredible, but OpenManus can achieve any ideas without an Invite Code 🛫!
+Our team members @mannaandpoem @XiangJinyu @MoshiQAQ @didiforgithub from @MetaGPT built it within 3 hours!
+It's a simple implementation, so we welcome any suggestions, contributions, and feedback!
+Enjoy your own agent with OpenManus!
+## Project Demo
+[Demo Video](https://github.com/mannaandpoem/OpenManus/blob/main/demo/seo_website.mp4)
+## Installation
+1. Create a new conda environment:
+```bash
+conda create -n open_manus python=3.12
+conda activate open_manus
+```
+2. Clone the repository:
+```bash
+git clone https://github.com/mannaandpoem/OpenManus.git
+cd OpenManus
+```
+3. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+## Configuration
+OpenManus requires configuration for the LLM APIs it uses. Follow these steps to set up your configuration:
+1. Create a `config.toml` file in the `config` directory (you can copy from the example):
+```bash
+cp config/config.example.toml config/config.toml
+```
+2. Edit `config/config.toml` to add your API keys and customize settings:
+```toml
+# Global LLM configuration
+[llm]
+model = "gpt-4o"
+base_url = "https://api.openai.com/v1"
+api_key = "sk-..."  # Replace with your actual API key
+max_tokens = 4096
+temperature = 0.0
+# Optional configuration for specific LLM models
+[llm.vision]
+model = "gpt-4o"
+base_url = "https://api.openai.com/v1"
+api_key = "sk-..."  # Replace with your actual API key
+```
+## Quick Start
+One line for run OpenManus:
+```bash
+python main.py
+```
+Then input your idea via terminal!
+For unstable version, you also can run:
+```bash
+python run_flow.py
+```
+## How to contribute
+We welcome any friendly suggestions and helpful contributions! Just create issues or submit pull requests.
+Or contact @mannaandpoem via 📧email: mannaandpoem@gmail.com
+## Roadmap
+- [ ] Better Planning
+- [ ] Live Demos
+- [ ] Replay
+- [ ] RL Fine-tuned Models
+- [ ] Comprehensive Benchmarks
+## Acknowledgement
+Thanks to [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) and [broswer-use](https://github.com/browser-use/browser-use) for providing basic support for this project!
+OpenManus is built by contributors from MetaGPT. Huge thanks to this agent community!
--- a/app/__init__.py
+++ b/app/__init__.py
--- a/app/agent/__init__.py
+++ b/app/agent/__init__.py
+from app.agent.base import BaseAgent
+from app.agent.planning import PlanningAgent
+from app.agent.react import ReActAgent
+from app.agent.swe import SWEAgent
+from app.agent.toolcall import ToolCallAgent
+__all__ = [
+    "BaseAgent",
+    "PlanningAgent",
+    "ReActAgent",
+    "SWEAgent",
+    "ToolCallAgent",
+]
--- a/app/agent/base.py
+++ b/app/agent/base.py
+from abc import ABC, abstractmethod
+from contextlib import asynccontextmanager
+from typing import List, Literal, Optional
+from pydantic import BaseModel, Field, model_validator
+from app.llm import LLM
+from app.logger import logger
+from app.schema import AgentState, Memory, Message
+class BaseAgent(BaseModel, ABC):
+    """Abstract base class for managing agent state and execution.
+    Provides foundational functionality for state transitions, memory management,
+    and a step-based execution loop. Subclasses must implement the `step` method.
+    """
+    # Core attributes
+    name: str = Field(..., description="Unique name of the agent")
+    description: Optional[str] = Field(None, description="Optional agent description")
+    # Prompts
+    system_prompt: Optional[str] = Field(
+        None, description="System-level instruction prompt"
+    )
+    next_step_prompt: Optional[str] = Field(
+        None, description="Prompt for determining next action"
+    )
+    # Dependencies
+    llm: LLM = Field(default_factory=LLM, description="Language model instance")
+    memory: Memory = Field(default_factory=Memory, description="Agent's memory store")
+    state: AgentState = Field(
+        default=AgentState.IDLE, description="Current agent state"
+    )
+    # Execution control
+    max_steps: int = Field(default=10, description="Maximum steps before termination")
+    current_step: int = Field(default=0, description="Current step in execution")
+    duplicate_threshold: int = 2
+    class Config:
+        arbitrary_types_allowed = True
+        extra = "allow"  # Allow extra fields for flexibility in subclasses
+    @model_validator(mode="after")
+    def initialize_agent(self) -> "BaseAgent":
+        """Initialize agent with default settings if not provided."""
+        if self.llm is None or not isinstance(self.llm, LLM):
+            self.llm = LLM(config_name=self.name.lower())
+        if not isinstance(self.memory, Memory):
+            self.memory = Memory()
+        return self
+    @asynccontextmanager
+    async def state_context(self, new_state: AgentState):
+        """Context manager for safe agent state transitions.
+        Args:
+            new_state: The state to transition to during the context.
+        Yields:
+            None: Allows execution within the new state.
+        Raises:
+            ValueError: If the new_state is invalid.
+        """
+        if not isinstance(new_state, AgentState):
+            raise ValueError(f"Invalid state: {new_state}")
+        previous_state = self.state
+        self.state = new_state
+        try:
+            yield
+        except Exception as e:
+            self.state = AgentState.ERROR  # Transition to ERROR on failure
+            raise e
+        finally:
+            self.state = previous_state  # Revert to previous state
+    def update_memory(
+        self,
+        role: Literal["user", "system", "assistant", "tool"],
+        content: str,
+        **kwargs,
+    ) -> None:
+        """Add a message to the agent's memory.
+        Args:
+            role: The role of the message sender (user, system, assistant, tool).
+            content: The message content.
+            **kwargs: Additional arguments (e.g., tool_call_id for tool messages).
+        Raises:
+            ValueError: If the role is unsupported.
+        """
+        message_map = {
+            "user": Message.user_message,
+            "system": Message.system_message,
+            "assistant": Message.assistant_message,
+            "tool": lambda content, **kw: Message.tool_message(content, **kw),
+        }
+        if role not in message_map:
+            raise ValueError(f"Unsupported message role: {role}")
+        msg_factory = message_map[role]
+        msg = msg_factory(content, **kwargs) if role == "tool" else msg_factory(content)
+        self.memory.add_message(msg)
+    async def run(self, request: Optional[str] = None) -> str:
+        """Execute the agent's main loop asynchronously.
+        Args:
+            request: Optional initial user request to process.
+        Returns:
+            A string summarizing the execution results.
+        Raises:
+            RuntimeError: If the agent is not in IDLE state at start.
+        """
+        if self.state != AgentState.IDLE:
+            raise RuntimeError(f"Cannot run agent from state: {self.state}")
+        if request:
+            self.update_memory("user", request)
+        results: List[str] = []
+        async with self.state_context(AgentState.RUNNING):
+            while (
+                self.current_step < self.max_steps and self.state != AgentState.FINISHED
+            ):
+                self.current_step += 1
+                logger.info(f"Executing step {self.current_step}/{self.max_steps}")
+                step_result = await self.step()
+                # Check for stuck state
+                if self.is_stuck():
+                    self.handle_stuck_state()
+                results.append(f"Step {self.current_step}: {step_result}")
+            if self.current_step >= self.max_steps:
+                results.append(f"Terminated: Reached max steps ({self.max_steps})")
+        return "\n".join(results) if results else "No steps executed"
+    @abstractmethod
+    async def step(self) -> str:
+        """Execute a single step in the agent's workflow.
+        Must be implemented by subclasses to define specific behavior.
+        """
+    def handle_stuck_state(self):
+        """Handle stuck state by adding a prompt to change strategy"""
+        stuck_prompt = "\
+        Observed duplicate responses. Consider new strategies and avoid repeating ineffective paths already attempted."
+        self.next_step_prompt = f"{stuck_prompt}\n{self.next_step_prompt}"
+        logger.warning(f"Agent detected stuck state. Added prompt: {stuck_prompt}")
+    def is_stuck(self) -> bool:
+        """Check if the agent is stuck in a loop by detecting duplicate content"""
+        if len(self.memory.messages) < 2:
+            return False
+        last_message = self.memory.messages[-1]
+        if not last_message.content:
+            return False
+        # Count identical content occurrences
+        duplicate_count = sum(
+            1
+            for msg in reversed(self.memory.messages[:-1])
+            if msg.role == "assistant" and msg.content == last_message.content
+        )
+        return duplicate_count >= self.duplicate_threshold
+    @property
+    def messages(self) -> List[Message]:
+        """Retrieve a list of messages from the agent's memory."""
+        return self.memory.messages
+    @messages.setter
+    def messages(self, value: List[Message]):
+        """Set the list of messages in the agent's memory."""
+        self.memory.messages = value
--- a/app/agent/manus.py
+++ b/app/agent/manus.py
+from pydantic import Field
+from app.agent.toolcall import ToolCallAgent
+from app.prompt.manus import NEXT_STEP_PROMPT, SYSTEM_PROMPT
+from app.tool import Terminate, ToolCollection
+from app.tool.browser_use_tool import BrowserUseTool
+from app.tool.file_saver import FileSaver
+# from app.tool.google_search import GoogleSearch
+# from app.tool.baidu_search import BaiduSearch
+from app.tool.bing_search import BingSearch
+from app.tool.python_execute import PythonExecute
+class Manus(ToolCallAgent):
+    """
+    A versatile general-purpose agent that uses planning to solve various tasks.
+    This agent extends PlanningAgent with a comprehensive set of tools and capabilities,
+    including Python execution, web browsing, file operations, and information retrieval
+    to handle a wide range of user requests.
+    """
+    name: str = "Manus"
+    description: str = (
+        "A versatile agent that can solve various tasks using multiple tools"
+    )
+    system_prompt: str = SYSTEM_PROMPT
+    next_step_prompt: str = NEXT_STEP_PROMPT
+    # Add general-purpose tools to the tool collection
+    # available_tools: ToolCollection = Field(
+    #    default_factory=lambda: ToolCollection(
+    #        PythonExecute(), GoogleSearch(), BrowserUseTool(), FileSaver(), Terminate()
+    #    )
+    #)
+    available_tools: ToolCollection = Field(
+        default_factory=lambda: ToolCollection(
+            PythonExecute(), BingSearch(), BrowserUseTool(), FileSaver(), Terminate()
+        )
+    )
--- a/app/agent/planning.py
+++ b/app/agent/planning.py
+import time
+from typing import Dict, List, Literal, Optional
+from pydantic import Field, model_validator
+from app.agent.toolcall import ToolCallAgent
+from app.logger import logger
+from app.prompt.planning import NEXT_STEP_PROMPT, PLANNING_SYSTEM_PROMPT
+from app.schema import Message, ToolCall
+from app.tool import PlanningTool, Terminate, ToolCollection
+class PlanningAgent(ToolCallAgent):
+    """
+    An agent that creates and manages plans to solve tasks.
+    This agent uses a planning tool to create and manage structured plans,
+    and tracks progress through individual steps until task completion.
+    """
+    name: str = "planning"
+    description: str = "An agent that creates and manages plans to solve tasks"
+    system_prompt: str = PLANNING_SYSTEM_PROMPT
+    next_step_prompt: str = NEXT_STEP_PROMPT
+    available_tools: ToolCollection = Field(
+        default_factory=lambda: ToolCollection(PlanningTool(), Terminate())
+    )
+    tool_choices: Literal["none", "auto", "required"] = "auto"
+    special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name])
+    tool_calls: List[ToolCall] = Field(default_factory=list)
+    active_plan_id: Optional[str] = Field(default=None)
+    # Add a dictionary to track the step status for each tool call
+    step_execution_tracker: Dict[str, Dict] = Field(default_factory=dict)
+    current_step_index: Optional[int] = None
+    max_steps: int = 20
+    @model_validator(mode="after")
+    def initialize_plan_and_verify_tools(self) -> "PlanningAgent":
+        """Initialize the agent with a default plan ID and validate required tools."""
+        self.active_plan_id = f"plan_{int(time.time())}"
+        if "planning" not in self.available_tools.tool_map:
+            self.available_tools.add_tool(PlanningTool())
+        return self
+    async def think(self) -> bool:
+        """Decide the next action based on plan status."""
+        prompt = (
+            f"CURRENT PLAN STATUS:\n{await self.get_plan()}\n\n{self.next_step_prompt}"
+            if self.active_plan_id
+            else self.next_step_prompt
+        )
+        self.messages.append(Message.user_message(prompt))
+        # Get the current step index before thinking
+        self.current_step_index = await self._get_current_step_index()
+        result = await super().think()
+        # After thinking, if we decided to execute a tool and it's not a planning tool or special tool,
+        # associate it with the current step for tracking
+        if result and self.tool_calls:
+            latest_tool_call = self.tool_calls[0]  # Get the most recent tool call
+            if (
+                latest_tool_call.function.name != "planning"
+                and latest_tool_call.function.name not in self.special_tool_names
+                and self.current_step_index is not None
+            ):
+                self.step_execution_tracker[latest_tool_call.id] = {
+                    "step_index": self.current_step_index,
+                    "tool_name": latest_tool_call.function.name,
+                    "status": "pending",  # Will be updated after execution
+                }
+        return result
+    async def act(self) -> str:
+        """Execute a step and track its completion status."""
+        result = await super().act()
+        # After executing the tool, update the plan status
+        if self.tool_calls:
+            latest_tool_call = self.tool_calls[0]
+            # Update the execution status to completed
+            if latest_tool_call.id in self.step_execution_tracker:
+                self.step_execution_tracker[latest_tool_call.id]["status"] = "completed"
+                self.step_execution_tracker[latest_tool_call.id]["result"] = result
+                # Update the plan status if this was a non-planning, non-special tool
+                if (
+                    latest_tool_call.function.name != "planning"
+                    and latest_tool_call.function.name not in self.special_tool_names
+                ):
+                    await self.update_plan_status(latest_tool_call.id)
+        return result
+    async def get_plan(self) -> str:
+        """Retrieve the current plan status."""
+        if not self.active_plan_id:
+            return "No active plan. Please create a plan first."
+        result = await self.available_tools.execute(
+            name="planning",
+            tool_input={"command": "get", "plan_id": self.active_plan_id},
+        )
+        return result.output if hasattr(result, "output") else str(result)
+    async def run(self, request: Optional[str] = None) -> str:
+        """Run the agent with an optional initial request."""
+        if request:
+            await self.create_initial_plan(request)
+        return await super().run()
+    async def update_plan_status(self, tool_call_id: str) -> None:
+        """
+        Update the current plan progress based on completed tool execution.
+        Only marks a step as completed if the associated tool has been successfully executed.
+        """
+        if not self.active_plan_id:
+            return
+        if tool_call_id not in self.step_execution_tracker:
+            logger.warning(f"No step tracking found for tool call {tool_call_id}")
+            return
+        tracker = self.step_execution_tracker[tool_call_id]
+        if tracker["status"] != "completed":
+            logger.warning(f"Tool call {tool_call_id} has not completed successfully")
+            return
+        step_index = tracker["step_index"]
+        try:
+            # Mark the step as completed
+            await self.available_tools.execute(
+                name="planning",
+                tool_input={
+                    "command": "mark_step",
+                    "plan_id": self.active_plan_id,
+                    "step_index": step_index,
+                    "step_status": "completed",
+                },
+            )
+            logger.info(
+                f"Marked step {step_index} as completed in plan {self.active_plan_id}"
+            )
+        except Exception as e:
+            logger.warning(f"Failed to update plan status: {e}")
+    async def _get_current_step_index(self) -> Optional[int]:
+        """
+        Parse the current plan to identify the first non-completed step's index.
+        Returns None if no active step is found.
+        """
+        if not self.active_plan_id:
+            return None
+        plan = await self.get_plan()
+        try:
+            plan_lines = plan.splitlines()
+            steps_index = -1
+            # Find the index of the "Steps:" line
+            for i, line in enumerate(plan_lines):
+                if line.strip() == "Steps:":
+                    steps_index = i
+                    break
+            if steps_index == -1:
+                return None
+            # Find the first non-completed step
+            for i, line in enumerate(plan_lines[steps_index + 1 :], start=0):
+                if "[ ]" in line or "[→]" in line:  # not_started or in_progress
+                    # Mark current step as in_progress
+                    await self.available_tools.execute(
+                        name="planning",
+                        tool_input={
+                            "command": "mark_step",
+                            "plan_id": self.active_plan_id,
+                            "step_index": i,
+                            "step_status": "in_progress",
+                        },
+                    )
+                    return i
+            return None  # No active step found
+        except Exception as e:
+            logger.warning(f"Error finding current step index: {e}")
+            return None
+    async def create_initial_plan(self, request: str) -> None:
+        """Create an initial plan based on the request."""
+        logger.info(f"Creating initial plan with ID: {self.active_plan_id}")
+        messages = [
+            Message.user_message(
+                f"Analyze the request and create a plan with ID {self.active_plan_id}: {request}"
+            )
+        ]
+        self.memory.add_messages(messages)
+        response = await self.llm.ask_tool(
+            messages=messages,
+            system_msgs=[Message.system_message(self.system_prompt)],
+            tools=self.available_tools.to_params(),
+            tool_choice="required",
+        )
+        assistant_msg = Message.from_tool_calls(
+            content=response.content, tool_calls=response.tool_calls
+        )
+        self.memory.add_message(assistant_msg)
+        plan_created = False
+        for tool_call in response.tool_calls:
+            if tool_call.function.name == "planning":
+                result = await self.execute_tool(tool_call)
+                logger.info(
+                    f"Executed tool {tool_call.function.name} with result: {result}"
+                )
+                # Add tool response to memory
+                tool_msg = Message.tool_message(
+                    content=result,
+                    tool_call_id=tool_call.id,
+                    name=tool_call.function.name,
+                )
+                self.memory.add_message(tool_msg)
+                plan_created = True
+                break
+        if not plan_created:
+            logger.warning("No plan created from initial request")
+            tool_msg = Message.assistant_message(
+                "Error: Parameter `plan_id` is required for command: create"
+            )
+            self.memory.add_message(tool_msg)
+async def main():
+    # Configure and run the agent
+    agent = PlanningAgent(available_tools=ToolCollection(PlanningTool(), Terminate()))
+    result = await agent.run("Help me plan a trip to the moon")
+    print(result)
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())
--- a/app/agent/react.py
+++ b/app/agent/react.py
+from abc import ABC, abstractmethod
+from typing import Optional
+from pydantic import Field
+from app.agent.base import BaseAgent
+from app.llm import LLM
+from app.schema import AgentState, Memory
+class ReActAgent(BaseAgent, ABC):
+    name: str
+    description: Optional[str] = None
+    system_prompt: Optional[str] = None
+    next_step_prompt: Optional[str] = None
+    llm: Optional[LLM] = Field(default_factory=LLM)
+    memory: Memory = Field(default_factory=Memory)
+    state: AgentState = AgentState.IDLE
+    max_steps: int = 10
+    current_step: int = 0
+    @abstractmethod
+    async def think(self) -> bool:
+        """Process current state and decide next action"""
+    @abstractmethod
+    async def act(self) -> str:
+        """Execute decided actions"""
+    async def step(self) -> str:
+        """Execute a single step: think and act."""
+        should_act = await self.think()
+        if not should_act:
+            return "Thinking complete - no action needed"
+        return await self.act()
--- a/app/agent/swe.py
+++ b/app/agent/swe.py
+from typing import List
+from pydantic import Field
+from app.agent.toolcall import ToolCallAgent
+from app.prompt.swe import NEXT_STEP_TEMPLATE, SYSTEM_PROMPT
+from app.tool import Bash, StrReplaceEditor, Terminate, ToolCollection
+class SWEAgent(ToolCallAgent):
+    """An agent that implements the SWEAgent paradigm for executing code and natural conversations."""
+    name: str = "swe"
+    description: str = "an autonomous AI programmer that interacts directly with the computer to solve tasks."
+    system_prompt: str = SYSTEM_PROMPT
+    next_step_prompt: str = NEXT_STEP_TEMPLATE
+    available_tools: ToolCollection = ToolCollection(
+        Bash(), StrReplaceEditor(), Terminate()
+    )
+    special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name])
+    max_steps: int = 30
+    bash: Bash = Field(default_factory=Bash)
+    working_dir: str = "."
+    async def think(self) -> bool:
+        """Process current state and decide next action"""
+        # Update working directory
+        self.working_dir = await self.bash.execute("pwd")
+        self.next_step_prompt = self.next_step_prompt.format(
+            current_dir=self.working_dir
+        )
+        return await super().think()
--- a/app/agent/toolcall.py
+++ b/app/agent/toolcall.py
+import json
+from typing import Any, List, Literal
+from pydantic import Field
+from app.agent.react import ReActAgent
+from app.logger import logger
+from app.prompt.toolcall import NEXT_STEP_PROMPT, SYSTEM_PROMPT
+from app.schema import AgentState, Message, ToolCall
+from app.tool import CreateChatCompletion, Terminate, ToolCollection
+TOOL_CALL_REQUIRED = "Tool calls required but none provided"
+class ToolCallAgent(ReActAgent):
+    """Base agent class for handling tool/function calls with enhanced abstraction"""
+    name: str = "toolcall"
+    description: str = "an agent that can execute tool calls."
+    system_prompt: str = SYSTEM_PROMPT
+    next_step_prompt: str = NEXT_STEP_PROMPT
+    available_tools: ToolCollection = ToolCollection(
+        CreateChatCompletion(), Terminate()
+    )
+    tool_choices: Literal["none", "auto", "required"] = "auto"
+    special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name])
+    tool_calls: List[ToolCall] = Field(default_factory=list)
+    max_steps: int = 30
+    async def think(self) -> bool:
+        """Process current state and decide next actions using tools"""
+        if self.next_step_prompt:
+            user_msg = Message.user_message(self.next_step_prompt)
+            self.messages += [user_msg]
+        # Get response with tool options
+        response = await self.llm.ask_tool(
+            messages=self.messages,
+            system_msgs=[Message.system_message(self.system_prompt)]
+            if self.system_prompt
+            else None,
+            tools=self.available_tools.to_params(),
+            tool_choice=self.tool_choices,
+        )
+        self.tool_calls = response.tool_calls
+        # Log response info
+        logger.info(f"✨ {self.name}'s thoughts: {response.content}")
+        logger.info(
+            f"🛠️ {self.name} selected {len(response.tool_calls) if response.tool_calls else 0} tools to use"
+        )
+        if response.tool_calls:
+            logger.info(
+                f"🧰 Tools being prepared: {[call.function.name for call in response.tool_calls]}"
+            )
+        try:
+            # Handle different tool_choices modes
+            if self.tool_choices == "none":
+                if response.tool_calls:
+                    logger.warning(
+                        f"🤔 Hmm, {self.name} tried to use tools when they weren't available!"
+                    )
+                if response.content:
+                    self.memory.add_message(Message.assistant_message(response.content))
+                    return True
+                return False
+            # Create and add assistant message
+            assistant_msg = (
+                Message.from_tool_calls(
+                    content=response.content, tool_calls=self.tool_calls
+                )
+                if self.tool_calls
+                else Message.assistant_message(response.content)
+            )
+            self.memory.add_message(assistant_msg)
+            if self.tool_choices == "required" and not self.tool_calls:
+                return True  # Will be handled in act()
+            # For 'auto' mode, continue with content if no commands but content exists
+            if self.tool_choices == "auto" and not self.tool_calls:
+                return bool(response.content)
+            return bool(self.tool_calls)
+        except Exception as e:
+            logger.error(f"🚨 Oops! The {self.name}'s thinking process hit a snag: {e}")
+            self.memory.add_message(
+                Message.assistant_message(
+                    f"Error encountered while processing: {str(e)}"
+                )
+            )
+            return False
+    async def act(self) -> str:
+        """Execute tool calls and handle their results"""
+        if not self.tool_calls:
+            if self.tool_choices == "required":
+                raise ValueError(TOOL_CALL_REQUIRED)
+            # Return last message content if no tool calls
+            return self.messages[-1].content or "No content or commands to execute"
+        results = []
+        for command in self.tool_calls:
+            result = await self.execute_tool(command)
+            logger.info(
+                f"🎯 Tool '{command.function.name}' completed its mission! Result: {result}"
+            )
+            # Add tool response to memory
+            tool_msg = Message.tool_message(
+                content=result, tool_call_id=command.id, name=command.function.name
+            )
+            self.memory.add_message(tool_msg)
+            results.append(result)
+        return "\n\n".join(results)
+    async def execute_tool(self, command: ToolCall) -> str:
+        """Execute a single tool call with robust error handling"""
+        if not command or not command.function or not command.function.name:
+            return "Error: Invalid command format"
+        name = command.function.name
+        if name not in self.available_tools.tool_map:
+            return f"Error: Unknown tool '{name}'"
+        try:
+            # Parse arguments
+            args = json.loads(command.function.arguments or "{}")
+            # Execute the tool
+            logger.info(f"🔧 Activating tool: '{name}'...")
+            result = await self.available_tools.execute(name=name, tool_input=args)
+            # Format result for display
+            observation = (
+                f"Observed output of cmd `{name}` executed:\n{str(result)}"
+                if result
+                else f"Cmd `{name}` completed with no output"
+            )
+            # Handle special tools like `finish`
+            await self._handle_special_tool(name=name, result=result)
+            return observation
+        except json.JSONDecodeError:
+            error_msg = f"Error parsing arguments for {name}: Invalid JSON format"
+            logger.error(
+                f"📝 Oops! The arguments for '{name}' don't make sense - invalid JSON"
+            )
+            return f"Error: {error_msg}"
+        except Exception as e:
+            error_msg = f"⚠️ Tool '{name}' encountered a problem: {str(e)}"
+            logger.error(error_msg)
+            return f"Error: {error_msg}"
+    async def _handle_special_tool(self, name: str, result: Any, **kwargs):
+        """Handle special tool execution and state changes"""
+        if not self._is_special_tool(name):
+            return
+        if self._should_finish_execution(name=name, result=result, **kwargs):
+            # Set agent state to finished
+            logger.info(f"🏁 Special tool '{name}' has completed the task!")
+            self.state = AgentState.FINISHED
+    @staticmethod
+    def _should_finish_execution(**kwargs) -> bool:
+        """Determine if tool execution should finish the agent"""
+        return True
+    def _is_special_tool(self, name: str) -> bool:
+        """Check if tool name is in special tools list"""
+        return name.lower() in [n.lower() for n in self.special_tool_names]
--- a/app/config.py
+++ b/app/config.py
+import threading
+import tomllib
+from pathlib import Path
+from typing import Dict
+from pydantic import BaseModel, Field
+def get_project_root() -> Path:
+    """Get the project root directory"""
+    return Path(__file__).resolve().parent.parent
+PROJECT_ROOT = get_project_root()
+WORKSPACE_ROOT = PROJECT_ROOT / "workspace"
+class LLMSettings(BaseModel):
+    model: str = Field(..., description="Model name")
+    base_url: str = Field(..., description="API base URL")
+    api_key: str = Field(..., description="API key")
+    max_tokens: int = Field(4096, description="Maximum number of tokens per request")
+    temperature: float = Field(1.0, description="Sampling temperature")
+class AppConfig(BaseModel):
+    llm: Dict[str, LLMSettings]
+class Config:
+    _instance = None
+    _lock = threading.Lock()
+    _initialized = False
+    def __new__(cls):
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super().__new__(cls)
+        return cls._instance
+    def __init__(self):
+        if not self._initialized:
+            with self._lock:
+                if not self._initialized:
+                    self._config = None
+                    self._load_initial_config()
+                    self._initialized = True
+    @staticmethod
+    def _get_config_path() -> Path:
+        root = PROJECT_ROOT
+        config_path = root / "config" / "config.toml"
+        if config_path.exists():
+            return config_path
+        example_path = root / "config" / "config.example.toml"
+        if example_path.exists():
+            return example_path
+        raise FileNotFoundError("No configuration file found in config directory")
+    def _load_config(self) -> dict:
+        config_path = self._get_config_path()
+        with config_path.open("rb") as f:
+            return tomllib.load(f)
+    def _load_initial_config(self):
+        raw_config = self._load_config()
+        base_llm = raw_config.get("llm", {})
+        llm_overrides = {
+            k: v for k, v in raw_config.get("llm", {}).items() if isinstance(v, dict)
+        }
+        default_settings = {
+            "model": base_llm.get("model"),
+            "base_url": base_llm.get("base_url"),
+            "api_key": base_llm.get("api_key"),
+            "max_tokens": base_llm.get("max_tokens", 4096),
+            "temperature": base_llm.get("temperature", 1.0),
+        }
+        config_dict = {
+            "llm": {
+                "default": default_settings,
+                **{
+                    name: {**default_settings, **override_config}
+                    for name, override_config in llm_overrides.items()
+                },
+            }
+        }
+        self._config = AppConfig(**config_dict)
+    @property
+    def llm(self) -> Dict[str, LLMSettings]:
+        return self._config.llm
+config = Config()
--- a/app/exceptions.py
+++ b/app/exceptions.py
+class ToolError(Exception):
+    """Raised when a tool encounters an error."""
+    def __init__(self, message):
+        self.message = message
--- a/app/flow/__init__.py
+++ b/app/flow/__init__.py
--- a/app/flow/base.py
+++ b/app/flow/base.py
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Dict, List, Optional, Union
+from pydantic import BaseModel
+from app.agent.base import BaseAgent
+class FlowType(str, Enum):
+    PLANNING = "planning"
+class BaseFlow(BaseModel, ABC):
+    """Base class for execution flows supporting multiple agents"""
+    agents: Dict[str, BaseAgent]
+    tools: Optional[List] = None
+    primary_agent_key: Optional[str] = None
+    class Config:
+        arbitrary_types_allowed = True
+    def __init__(
+        self, agents: Union[BaseAgent, List[BaseAgent], Dict[str, BaseAgent]], **data
+    ):
+        # Handle different ways of providing agents
+        if isinstance(agents, BaseAgent):
+            agents_dict = {"default": agents}
+        elif isinstance(agents, list):
+            agents_dict = {f"agent_{i}": agent for i, agent in enumerate(agents)}
+        else:
+            agents_dict = agents
+        # If primary agent not specified, use first agent
+        primary_key = data.get("primary_agent_key")
+        if not primary_key and agents_dict:
+            primary_key = next(iter(agents_dict))
+            data["primary_agent_key"] = primary_key
+        # Set the agents dictionary
+        data["agents"] = agents_dict
+        # Initialize using BaseModel's init
+        super().__init__(**data)
+    @property
+    def primary_agent(self) -> Optional[BaseAgent]:
+        """Get the primary agent for the flow"""
+        return self.agents.get(self.primary_agent_key)
+    def get_agent(self, key: str) -> Optional[BaseAgent]:
+        """Get a specific agent by key"""
+        return self.agents.get(key)
+    def add_agent(self, key: str, agent: BaseAgent) -> None:
+        """Add a new agent to the flow"""
+        self.agents[key] = agent
+    @abstractmethod
+    async def execute(self, input_text: str) -> str:
+        """Execute the flow with given input"""
--- a/app/flow/flow_factory.py
+++ b/app/flow/flow_factory.py
+from typing import Dict, List, Union
+from app.agent.base import BaseAgent
+from app.flow.base import BaseFlow, FlowType
+from app.flow.planning import PlanningFlow
+class FlowFactory:
+    """Factory for creating different types of flows with support for multiple agents"""
+    @staticmethod
+    def create_flow(
+        flow_type: FlowType,
+        agents: Union[BaseAgent, List[BaseAgent], Dict[str, BaseAgent]],
+        **kwargs,
+    ) -> BaseFlow:
+        flows = {
+            FlowType.PLANNING: PlanningFlow,
+        }
+        flow_class = flows.get(flow_type)
+        if not flow_class:
+            raise ValueError(f"Unknown flow type: {flow_type}")
+        return flow_class(agents, **kwargs)
--- a/app/flow/planning.py
+++ b/app/flow/planning.py
+import json
+import time
+from typing import Dict, List, Optional, Union
+from pydantic import Field
+from app.agent.base import BaseAgent
+from app.flow.base import BaseFlow
+from app.llm import LLM
+from app.logger import logger
+from app.schema import AgentState, Message
+from app.tool import PlanningTool
+class PlanningFlow(BaseFlow):
+    """A flow that manages planning and execution of tasks using agents."""
+    llm: LLM = Field(default_factory=lambda: LLM())
+    planning_tool: PlanningTool = Field(default_factory=PlanningTool)
+    executor_keys: List[str] = Field(default_factory=list)
+    active_plan_id: str = Field(default_factory=lambda: f"plan_{int(time.time())}")
+    current_step_index: Optional[int] = None
+    def __init__(
+        self, agents: Union[BaseAgent, List[BaseAgent], Dict[str, BaseAgent]], **data
+    ):
+        # Set executor keys before super().__init__
+        if "executors" in data:
+            data["executor_keys"] = data.pop("executors")
+        # Set plan ID if provided
+        if "plan_id" in data:
+            data["active_plan_id"] = data.pop("plan_id")
+        # Initialize the planning tool if not provided
+        if "planning_tool" not in data:
+            planning_tool = PlanningTool()
+            data["planning_tool"] = planning_tool
+        # Call parent's init with the processed data
+        super().__init__(agents, **data)
+        # Set executor_keys to all agent keys if not specified
+        if not self.executor_keys:
+            self.executor_keys = list(self.agents.keys())
+    def get_executor(self, step_type: Optional[str] = None) -> BaseAgent:
+        """
+        Get an appropriate executor agent for the current step.
+        Can be extended to select agents based on step type/requirements.
+        """
+        # If step type is provided and matches an agent key, use that agent
+        if step_type and step_type in self.agents:
+            return self.agents[step_type]
+        # Otherwise use the first available executor or fall back to primary agent
+        for key in self.executor_keys:
+            if key in self.agents:
+                return self.agents[key]
+        # Fallback to primary agent
+        return self.primary_agent
+    async def execute(self, input_text: str) -> str:
+        """Execute the planning flow with agents."""
+        try:
+            if not self.primary_agent:
+                raise ValueError("No primary agent available")
+            # Create initial plan if input provided
+            if input_text:
+                await self._create_initial_plan(input_text)
+                # Verify plan was created successfully
+                if self.active_plan_id not in self.planning_tool.plans:
+                    logger.error(
+                        f"Plan creation failed. Plan ID {self.active_plan_id} not found in planning tool."
+                    )
+                    return f"Failed to create plan for: {input_text}"
+            result = ""
+            while True:
+                # Get current step to execute
+                self.current_step_index, step_info = await self._get_current_step_info()
+                # Exit if no more steps or plan completed
+                if self.current_step_index is None:
+                    result += await self._finalize_plan()
+                    break
+                # Execute current step with appropriate agent
+                step_type = step_info.get("type") if step_info else None
+                executor = self.get_executor(step_type)
+                step_result = await self._execute_step(executor, step_info)
+                result += step_result + "\n"
+                # Check if agent wants to terminate
+                if hasattr(executor, "state") and executor.state == AgentState.FINISHED:
+                    break
+            return result
+        except Exception as e:
+            logger.error(f"Error in PlanningFlow: {str(e)}")
+            return f"Execution failed: {str(e)}"
+    async def _create_initial_plan(self, request: str) -> None:
+        """Create an initial plan based on the request using the flow's LLM and PlanningTool."""
+        logger.info(f"Creating initial plan with ID: {self.active_plan_id}")
+        # Create a system message for plan creation
+        system_message = Message.system_message(
+            "You are a planning assistant. Your task is to create a detailed plan with clear steps."
+        )
+        # Create a user message with the request
+        user_message = Message.user_message(
+            f"Create a detailed plan to accomplish this task: {request}"
+        )
+        # Call LLM with PlanningTool
+        response = await self.llm.ask_tool(
+            messages=[user_message],
+            system_msgs=[system_message],
+            tools=[self.planning_tool.to_param()],
+            tool_choice="required",
+        )
+        # Process tool calls if present
+        if response.tool_calls:
+            for tool_call in response.tool_calls:
+                if tool_call.function.name == "planning":
+                    # Parse the arguments
+                    args = tool_call.function.arguments
+                    if isinstance(args, str):
+                        try:
+                            args = json.loads(args)
+                        except json.JSONDecodeError:
+                            logger.error(f"Failed to parse tool arguments: {args}")
+                            continue
+                    # Ensure plan_id is set correctly and execute the tool
+                    args["plan_id"] = self.active_plan_id
+                    # Execute the tool via ToolCollection instead of directly
+                    result = await self.planning_tool.execute(**args)
+                    logger.info(f"Plan creation result: {str(result)}")
+                    return
+        # If execution reached here, create a default plan
+        logger.warning("Creating default plan")
+        # Create default plan using the ToolCollection
+        await self.planning_tool.execute(
+            **{
+                "command": "create",
+                "plan_id": self.active_plan_id,
+                "title": f"Plan for: {request[:50]}{'...' if len(request) > 50 else ''}",
+                "steps": ["Analyze request", "Execute task", "Verify results"],
+            }
+        )
+    async def _get_current_step_info(self) -> tuple[Optional[int], Optional[dict]]:
+        """
+        Parse the current plan to identify the first non-completed step's index and info.
+        Returns (None, None) if no active step is found.
+        """
+        if (
+            not self.active_plan_id
+            or self.active_plan_id not in self.planning_tool.plans
+        ):
+            logger.error(f"Plan with ID {self.active_plan_id} not found")
+            return None, None
+        try:
+            # Direct access to plan data from planning tool storage
+            plan_data = self.planning_tool.plans[self.active_plan_id]
+            steps = plan_data.get("steps", [])
+            step_statuses = plan_data.get("step_statuses", [])
+            # Find first non-completed step
+            for i, step in enumerate(steps):
+                if i >= len(step_statuses):
+                    status = "not_started"
+                else:
+                    status = step_statuses[i]
+                if status in ["not_started", "in_progress"]:
+                    # Extract step type/category if available
+                    step_info = {"text": step}
+                    # Try to extract step type from the text (e.g., [SEARCH] or [CODE])
+                    import re
+                    type_match = re.search(r"\[([A-Z_]+)\]", step)
+                    if type_match:
+                        step_info["type"] = type_match.group(1).lower()
+                    # Mark current step as in_progress
+                    try:
+                        await self.planning_tool.execute(
+                            command="mark_step",
+                            plan_id=self.active_plan_id,
+                            step_index=i,
+                            step_status="in_progress",
+                        )
+                    except Exception as e:
+                        logger.warning(f"Error marking step as in_progress: {e}")
+                        # Update step status directly if needed
+                        if i < len(step_statuses):
+                            step_statuses[i] = "in_progress"
+                        else:
+                            while len(step_statuses) < i:
+                                step_statuses.append("not_started")
+                            step_statuses.append("in_progress")
+                        plan_data["step_statuses"] = step_statuses
+                    return i, step_info
+            return None, None  # No active step found
+        except Exception as e:
+            logger.warning(f"Error finding current step index: {e}")
+            return None, None
+    async def _execute_step(self, executor: BaseAgent, step_info: dict) -> str:
+        """Execute the current step with the specified agent using agent.run()."""
+        # Prepare context for the agent with current plan status
+        plan_status = await self._get_plan_text()
+        step_text = step_info.get("text", f"Step {self.current_step_index}")
+        # Create a prompt for the agent to execute the current step
+        step_prompt = f"""
+        CURRENT PLAN STATUS:
+        {plan_status}
+        YOUR CURRENT TASK:
+        You are now working on step {self.current_step_index}: "{step_text}"
+        Please execute this step using the appropriate tools. When you're done, provide a summary of what you accomplished.
+        """
+        # Use agent.run() to execute the step
+        try:
+            step_result = await executor.run(step_prompt)
+            # Mark the step as completed after successful execution
+            await self._mark_step_completed()
+            return step_result
+        except Exception as e:
+            logger.error(f"Error executing step {self.current_step_index}: {e}")
+            return f"Error executing step {self.current_step_index}: {str(e)}"
+    async def _mark_step_completed(self) -> None:
+        """Mark the current step as completed."""
+        if self.current_step_index is None:
+            return
+        try:
+            # Mark the step as completed
+            await self.planning_tool.execute(
+                command="mark_step",
+                plan_id=self.active_plan_id,
+                step_index=self.current_step_index,
+                step_status="completed",
+            )
+            logger.info(
+                f"Marked step {self.current_step_index} as completed in plan {self.active_plan_id}"
+            )
+        except Exception as e:
+            logger.warning(f"Failed to update plan status: {e}")
+            # Update step status directly in planning tool storage
+            if self.active_plan_id in self.planning_tool.plans:
+                plan_data = self.planning_tool.plans[self.active_plan_id]
+                step_statuses = plan_data.get("step_statuses", [])
+                # Ensure the step_statuses list is long enough
+                while len(step_statuses) <= self.current_step_index:
+                    step_statuses.append("not_started")
+                # Update the status
+                step_statuses[self.current_step_index] = "completed"
+                plan_data["step_statuses"] = step_statuses
+    async def _get_plan_text(self) -> str:
+        """Get the current plan as formatted text."""
+        try:
+            result = await self.planning_tool.execute(
+                command="get", plan_id=self.active_plan_id
+            )
+            return result.output if hasattr(result, "output") else str(result)
+        except Exception as e:
+            logger.error(f"Error getting plan: {e}")
+            return self._generate_plan_text_from_storage()
+    def _generate_plan_text_from_storage(self) -> str:
+        """Generate plan text directly from storage if the planning tool fails."""
+        try:
+            if self.active_plan_id not in self.planning_tool.plans:
+                return f"Error: Plan with ID {self.active_plan_id} not found"
+            plan_data = self.planning_tool.plans[self.active_plan_id]
+            title = plan_data.get("title", "Untitled Plan")
+            steps = plan_data.get("steps", [])
+            step_statuses = plan_data.get("step_statuses", [])
+            step_notes = plan_data.get("step_notes", [])
+            # Ensure step_statuses and step_notes match the number of steps
+            while len(step_statuses) < len(steps):
+                step_statuses.append("not_started")
+            while len(step_notes) < len(steps):
+                step_notes.append("")
+            # Count steps by status
+            status_counts = {
+                "completed": 0,
+                "in_progress": 0,
+                "blocked": 0,
+                "not_started": 0,
+            }
+            for status in step_statuses:
+                if status in status_counts:
+                    status_counts[status] += 1
+            completed = status_counts["completed"]
+            total = len(steps)
+            progress = (completed / total) * 100 if total > 0 else 0
+            plan_text = f"Plan: {title} (ID: {self.active_plan_id})\n"
+            plan_text += "=" * len(plan_text) + "\n\n"
+            plan_text += (
+                f"Progress: {completed}/{total} steps completed ({progress:.1f}%)\n"
+            )
+            plan_text += f"Status: {status_counts['completed']} completed, {status_counts['in_progress']} in progress, "
+            plan_text += f"{status_counts['blocked']} blocked, {status_counts['not_started']} not started\n\n"
+            plan_text += "Steps:\n"
+            for i, (step, status, notes) in enumerate(
+                zip(steps, step_statuses, step_notes)
+            ):
+                if status == "completed":
+                    status_mark = "[✓]"
+                elif status == "in_progress":
+                    status_mark = "[→]"
+                elif status == "blocked":
+                    status_mark = "[!]"
+                else:  # not_started
+                    status_mark = "[ ]"
+                plan_text += f"{i}. {status_mark} {step}\n"
+                if notes:
+                    plan_text += f"   Notes: {notes}\n"
+            return plan_text
+        except Exception as e:
+            logger.error(f"Error generating plan text from storage: {e}")
+            return f"Error: Unable to retrieve plan with ID {self.active_plan_id}"
+    async def _finalize_plan(self) -> str:
+        """Finalize the plan and provide a summary using the flow's LLM directly."""
+        plan_text = await self._get_plan_text()
+        # Create a summary using the flow's LLM directly
+        try:
+            system_message = Message.system_message(
+                "You are a planning assistant. Your task is to summarize the completed plan."
+            )
+            user_message = Message.user_message(
+                f"The plan has been completed. Here is the final plan status:\n\n{plan_text}\n\nPlease provide a summary of what was accomplished and any final thoughts."
+            )
+            response = await self.llm.ask(
+                messages=[user_message], system_msgs=[system_message]
+            )
+            return f"Plan completed:\n\n{response}"
+        except Exception as e:
+            logger.error(f"Error finalizing plan with LLM: {e}")
+            # Fallback to using an agent for the summary
+            try:
+                agent = self.primary_agent
+                summary_prompt = f"""
+                The plan has been completed. Here is the final plan status:
+                {plan_text}
+                Please provide a summary of what was accomplished and any final thoughts.
+                """
+                summary = await agent.run(summary_prompt)
+                return f"Plan completed:\n\n{summary}"
+            except Exception as e2:
+                logger.error(f"Error finalizing plan with agent: {e2}")
+                return "Plan completed. Error generating summary."