Merge remote-tracking branch 'upstream/main' into feature-external-db-reconnect

75d71305 · perf3ct · ad32a2ef · 162643a4 · 75d71305 · 75d71305
Unverified Commit 75d71305 authored Jun 16, 2024 by perf3ct
20 changed files
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -11,7 +11,7 @@
 - [ ] **Dependencies:** Are there any new dependencies? Have you updated the dependency versions in the documentation?
 - [ ] **Testing:** Have you written and run sufficient tests for validating the changes?
 - [ ] **Code review:** Have you performed a self-review of your code, addressing any coding standard issues and ensuring adherence to the project's coding standards?
- [ ] **Label:** To cleary categorize this pull request, assign a relevant label to the pull request title, using one of the following:
+- [ ] **Prefix:** To cleary categorize this pull request, prefix the pull request title, using one of the following:
  - **BREAKING CHANGE**: Significant changes that may affect compatibility
  - **build**: Changes that affect the build system or external dependencies
  - **ci**: Changes to our continuous integration processes or workflows

--- a/.github/workflows/docker-build.yaml
+++ b/.github/workflows/docker-build.yaml
@@ -70,8 +70,10 @@ jobs:
          images: ${{ env.FULL_IMAGE_NAME }}
          tags: |
            type=ref,event=branch
+            ${{ github.ref_type == 'tag' && 'type=raw,value=main' || '' }}
          flavor: |
            prefix=cache-${{ matrix.platform }}-
+            latest=false

      - name: Build Docker image (latest)
        uses: docker/build-push-action@v5
@@ -158,8 +160,10 @@ jobs:
          images: ${{ env.FULL_IMAGE_NAME }}
          tags: |
            type=ref,event=branch
+            ${{ github.ref_type == 'tag' && 'type=raw,value=main' || '' }}
          flavor: |
            prefix=cache-cuda-${{ matrix.platform }}-
+            latest=false

      - name: Build Docker image (cuda)
        uses: docker/build-push-action@v5
@@ -247,8 +251,10 @@ jobs:
          images: ${{ env.FULL_IMAGE_NAME }}
          tags: |
            type=ref,event=branch
+            ${{ github.ref_type == 'tag' && 'type=raw,value=main' || '' }}
          flavor: |
            prefix=cache-ollama-${{ matrix.platform }}-
+            latest=false

      - name: Build Docker image (ollama)
        uses: docker/build-push-action@v5

--- a/.github/workflows/release-pypi.yml
+++ b/.github/workflows/release-pypi.yml
@@ -4,7 +4,6 @@ on:
  push:
    branches:
      - main # or whatever branch you want to use
-      - dev

 jobs:
  release:

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,190 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

+## [0.3.4] - 2024-06-12
+
+### Fixed
+
+- **🔒 Mixed Content with HTTPS Issue**: Resolved a problem where mixed content (HTTP and HTTPS) was causing security warnings and blocking resources on HTTPS sites.
+- **🔍 Web Search Issue**: Addressed the problem where web search functionality was not working correctly. The 'ENABLE_RAG_LOCAL_WEB_FETCH' option has been reintroduced to restore proper web searching capabilities.
+- **💾 RAG Template Not Being Saved**: Fixed an issue where the RAG template was not being saved correctly, ensuring your custom templates are now preserved as expected.
+
+## [0.3.3] - 2024-06-12
+
+### Added
+
+- **🛠️ Native Python Function Calling**: Introducing native Python function calling within Open WebUI. We’ve also included a built-in code editor to seamlessly develop and integrate function code within the 'Tools' workspace. With this, you can significantly enhance your LLM’s capabilities by creating custom RAG pipelines, web search tools, and even agent-like features such as sending Discord messages.
+- **🌐 DuckDuckGo Integration**: Added DuckDuckGo as a web search provider, giving you more search options.
+- **🌏 Enhanced Translations**: Improved translations for Vietnamese and Chinese languages, making the interface more accessible.
+
+### Fixed
+
+- **🔗 Web Search URL Error Handling**: Fixed the issue where a single URL error would disrupt the data loading process in Web Search mode. Now, such errors will be handled gracefully to ensure uninterrupted data loading.
+- **🖥️ Frontend Responsiveness**: Resolved the problem where the frontend would stop responding if the backend encounters an error while downloading a model. Improved error handling to maintain frontend stability.
+- **🔧 Dependency Issues in pip**: Fixed issues related to pip installations, ensuring all dependencies are correctly managed to prevent installation errors.
+
+## [0.3.2] - 2024-06-10
+
+### Added
+
+- **🔍 Web Search Query Status**: The web search query will now persist in the results section to aid in easier debugging and tracking of search queries.
+- **🌐 New Web Search Provider**: We have added Serply as a new option for web search providers, giving you more choices for your search needs.
+- **🌏 Improved Translations**: We've enhanced translations for Chinese and Portuguese.
+
+### Fixed
+
+- **🎤 Audio File Upload Issue**: The bug that prevented audio files from being uploaded in chat input has been fixed, ensuring smooth communication.
+- **💬 Message Input Handling**: Improved the handling of message inputs by instantly clearing images and text after sending, along with immediate visual indications when a response message is loading, enhancing user feedback.
+- **⚙️ Parameter Registration and Validation**: Fixed the issue where parameters were not registering in certain cases and addressed the problem where users were unable to save due to invalid input errors.
+
+## [0.3.1] - 2024-06-09
+
+### Fixed
+
+- **💬 Chat Functionality**: Resolved the issue where chat functionality was not working for specific models.
+
+## [0.3.0] - 2024-06-09
+
+### Added
+
+- **📚 Knowledge Support for Models**: Attach documents directly to models from the models workspace, enhancing the information available to each model.
+- **🎙️ Hands-Free Voice Call Feature**: Initiate voice calls without needing to use your hands, making interactions more seamless.
+- **📹 Video Call Feature**: Enable video calls with supported vision models like Llava and GPT-4o, adding a visual dimension to your communications.
+- **🎛️ Enhanced UI for Voice Recording**: Improved user interface for the voice recording feature, making it more intuitive and user-friendly.
+- **🌐 External STT Support**: Now support for external Speech-To-Text services, providing more flexibility in choosing your STT provider.
+- **⚙️ Unified Settings**: Consolidated settings including document settings under a new admin settings section for easier management.
+- **🌑 Dark Mode Splash Screen**: A new splash screen for dark mode, ensuring a consistent and visually appealing experience for dark mode users.
+- **📥 Upload Pipeline**: Directly upload pipelines from the admin settings > pipelines section, streamlining the pipeline management process.
+- **🌍 Improved Language Support**: Enhanced support for Chinese and Ukrainian languages, better catering to a global user base.
+
+### Fixed
+
+- **🛠️ Playground Issue**: Fixed the playground not functioning properly, ensuring a smoother user experience.
+- **🔥 Temperature Parameter Issue**: Corrected the issue where the temperature value '0' was not being passed correctly.
+- **📝 Prompt Input Clearing**: Resolved prompt input textarea not being cleared right away, ensuring a clean slate for new inputs.
+- **✨ Various UI Styling Issues**: Fixed numerous user interface styling problems for a more cohesive look.
+- **👥 Active Users Display**: Fixed active users showing active sessions instead of actual users, now reflecting accurate user activity.
+- **🌐 Community Platform Compatibility**: The Community Platform is back online and fully compatible with Open WebUI.
+
+### Changed
+
+- **📝 RAG Implementation**: Updated the RAG (Retrieval-Augmented Generation) implementation to use a system prompt for context, instead of overriding the user's prompt.
+- **🔄 Settings Relocation**: Moved Models, Connections, Audio, and Images settings to the admin settings for better organization.
+- **✍️ Improved Title Generation**: Enhanced the default prompt for title generation, yielding better results.
+- **🔧 Backend Task Management**: Tasks like title generation and search query generation are now managed on the backend side and controlled only by the admin.
+- **🔍 Editable Search Query Prompt**: You can now edit the search query generation prompt, offering more control over how queries are generated.
+- **📏 Prompt Length Threshold**: Set the prompt length threshold for search query generation from the admin settings, giving more customization options.
+- **📣 Settings Consolidation**: Merged the Banners admin setting with the Interface admin setting for a more streamlined settings area.
+
+## [0.2.5] - 2024-06-05
+
+### Added
+
+- **👥 Active Users Indicator**: Now you can see how many people are currently active and what they are running. This helps you gauge when performance might slow down due to a high number of users.
+- **🗂️ Create Ollama Modelfile**: The option to create a modelfile for Ollama has been reintroduced in the Settings > Models section, making it easier to manage your models.
+- **⚙️ Default Model Setting**: Added an option to set the default model from Settings > Interface. This feature is now easily accessible, especially convenient for mobile users as it was previously hidden.
+- **🌐 Enhanced Translations**: We've improved the Chinese translations and added support for Turkmen and Norwegian languages to make the interface more accessible globally.
+
+### Fixed
+
+- **📱 Mobile View Improvements**: The UI now uses dvh (dynamic viewport height) instead of vh (viewport height), providing a better and more responsive experience for mobile users.
+
+## [0.2.4] - 2024-06-03
+
+### Added
+
+- **👤 Improved Account Pending Page**: The account pending page now displays admin details by default to avoid confusion. You can disable this feature in the admin settings if needed.
+- **🌐 HTTP Proxy Support**: We have enabled the use of the 'http_proxy' environment variable in OpenAI and Ollama API calls, making it easier to configure network settings.
+- **❓ Quick Access to Documentation**: You can now easily access Open WebUI documents via a question mark button located at the bottom right corner of the screen (available on larger screens like PCs).
+- **🌍 Enhanced Translation**: Improvements have been made to translations.
+
+### Fixed
+
+- **🔍 SearxNG Web Search**: Fixed the issue where the SearxNG web search functionality was not working properly.
+
+## [0.2.3] - 2024-06-03
+
+### Added
+
+- **📁 Export Chat as JSON**: You can now export individual chats as JSON files from the navbar menu by navigating to 'Download > Export Chat'. This makes sharing specific conversations easier.
+- **✏️ Edit Titles with Double Click**: Double-click on titles to rename them quickly and efficiently.
+- **🧩 Batch Multiple Embeddings**: Introduced 'RAG_EMBEDDING_OPENAI_BATCH_SIZE' to process multiple embeddings in a batch, enhancing performance for large datasets.
+- **🌍 Improved Translations**: Enhanced the translation quality across various languages for a better user experience.
+
+### Fixed
+
+- **🛠️ Modelfile Migration Script**: Fixed an issue where the modelfile migration script would fail if an invalid modelfile was encountered.
+- **💬 Zhuyin Input Method on Mac**: Resolved an issue where using the Zhuyin input method in the Web UI on a Mac caused text to send immediately upon pressing the enter key, leading to incorrect input.
+- **🔊 Local TTS Voice Selection**: Fixed the issue where the selected local Text-to-Speech (TTS) voice was not being displayed in settings.
+
+## [0.2.2] - 2024-06-02
+
+### Added
+
+- **🌊 Mermaid Rendering Support**: We've included support for Mermaid rendering. This allows you to create beautiful diagrams and flowcharts directly within Open WebUI.
+- **🔄 New Environment Variable 'RESET_CONFIG_ON_START'**: Introducing a new environment variable: 'RESET_CONFIG_ON_START'. Set this variable to reset your configuration settings upon starting the application, making it easier to revert to default settings.
+
+### Fixed
+
+- **🔧 Pipelines Filter Issue**: We've addressed an issue with the pipelines where filters were not functioning as expected.
+
+## [0.2.1] - 2024-06-02
+
+### Added
+
+- **🖱️ Single Model Export Button**: Easily export models with just one click using the new single model export button.
+- **🖥️ Advanced Parameters Support**: Added support for 'num_thread', 'use_mmap', and 'use_mlock' parameters for Ollama.
+- **🌐 Improved Vietnamese Translation**: Enhanced Vietnamese language support for a better user experience for our Vietnamese-speaking community.
+
+### Fixed
+
+- **🔧 OpenAI URL API Save Issue**: Corrected a problem preventing the saving of OpenAI URL API settings.
+- **🚫 Display Issue with Disabled Ollama API**: Fixed the display bug causing models to appear in settings when the Ollama API was disabled.
+
+### Changed
+
+- **💡 Versioning Update**: As a reminder from our previous update, version 0.2.y will focus primarily on bug fixes, while major updates will be designated as 0.x from now on for better version tracking.
+
+## [0.2.0] - 2024-06-01
+
+### Added
+
+- **🔧 Pipelines Support**: Open WebUI now includes a plugin framework for enhanced customization and functionality (https://github.com/open-webui/pipelines). Easily add custom logic and integrate Python libraries, from AI agents to home automation APIs.
+- **🔗 Function Calling via Pipelines**: Integrate function calling seamlessly through Pipelines.
+- **⚖️ User Rate Limiting via Pipelines**: Implement user-specific rate limits to manage API usage efficiently.
+- **📊 Usage Monitoring with Langfuse**: Track and analyze usage statistics with Langfuse integration through Pipelines.
+- **🕒 Conversation Turn Limits**: Set limits on conversation turns to manage interactions better through Pipelines.
+- **🛡️ Toxic Message Filtering**: Automatically filter out toxic messages to maintain a safe environment using Pipelines.
+- **🔍 Web Search Support**: Introducing built-in web search capabilities via RAG API, allowing users to search using SearXNG, Google Programmatic Search Engine, Brave Search, serpstack, and serper. Activate it effortlessly by adding necessary variables from Document settings > Web Params.
+- **🗂️ Models Workspace**: Create and manage model presets for both Ollama/OpenAI API. Note: The old Modelfiles workspace is deprecated.
+- **🛠️ Model Builder Feature**: Build and edit all models with persistent builder mode.
+- **🏷️ Model Tagging Support**: Organize models with tagging features in the models workspace.
+- **📋 Model Ordering Support**: Effortlessly organize models by dragging and dropping them into the desired positions within the models workspace.
+- **📈 OpenAI Generation Stats**: Access detailed generation statistics for OpenAI models.
+- **📅 System Prompt Variables**: New variables added: '{{CURRENT_DATE}}' and '{{USER_NAME}}' for dynamic prompts.
+- **📢 Global Banner Support**: Manage global banners from admin settings > banners.
+- **🗃️ Enhanced Archived Chats Modal**: Search and export archived chats easily.
+- **📂 Archive All Button**: Quickly archive all chats from settings > chats.
+- **🌐 Improved Translations**: Added and improved translations for French, Croatian, Cebuano, and Vietnamese.
+
+### Fixed
+
+- **🔍 Archived Chats Visibility**: Resolved issue with archived chats not showing in the admin panel.
+- **💬 Message Styling**: Fixed styling issues affecting message appearance.
+- **🔗 Shared Chat Responses**: Corrected the issue where shared chat response messages were not readonly.
+- **🖥️ UI Enhancement**: Fixed the scrollbar overlapping issue with the message box in the user interface.
+
+### Changed
+
+- **💾 User Settings Storage**: User settings are now saved on the backend, ensuring consistency across all devices.
+- **📡 Unified API Requests**: The API request for getting models is now unified to '/api/models' for easier usage.
+- **🔄 Versioning Update**: Our versioning will now follow the format 0.x for major updates and 0.x.y for patches.
+- **📦 Export All Chats (All Users)**: Moved this functionality to the Admin Panel settings for better organization and accessibility.
+
+### Removed
+
+- **🚫 Bundled LiteLLM Support Deprecated**: Migrate your LiteLLM config.yaml to a self-hosted LiteLLM instance. LiteLLM can still be added via OpenAI Connections. Download the LiteLLM config.yaml from admin settings > database > export LiteLLM config.yaml.
+
 ## [0.1.125] - 2024-05-19

 ### Added

--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, religion, or sexual identity
+and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contribute to a positive environment for our community include:
+
+- Demonstrating empathy and kindness toward other people
+- Being respectful of differing opinions, viewpoints, and experiences
+- Giving and gracefully accepting constructive feedback
+- Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
+- Focusing on what is best not just for us as individuals, but for the overall community
+
+Examples of unacceptable behavior include:
+
+- The use of sexualized language or imagery, and sexual attention or advances of any kind
+- Trolling, insulting or derogatory comments, and personal or political attacks
+- Public or private harassment
+- Publishing others' private information, such as a physical or email address, without their explicit permission
+- **Spamming of any kind**
+- Aggressive sales tactics targeting our community members are strictly prohibited. You can mention your product if it's relevant to the discussion, but under no circumstances should you push it forcefully
+- Other conduct which could reasonably be considered inappropriate in a professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies within all community spaces and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, spamming, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at hello@openwebui.com. All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Temporary Ban
+
+**Community Impact**: Any violation of community standards, including but not limited to inappropriate language, unprofessional behavior, harassment, or spamming.
+
+**Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.
+
+### 2. Permanent Ban
+
+**Community Impact**: Repeated or severe violations of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within the community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.0, available at
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
+
+Community Impact Guidelines were inspired by [Mozilla's code of conduct
+enforcement ladder](https://github.com/mozilla/diversity).
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see the FAQ at
+https://www.contributor-covenant.org/faq. Translations are available at
+https://www.contributor-covenant.org/translations.
--- a/Dockerfile
+++ b/Dockerfile
@@ -38,7 +38,6 @@ ARG USE_OLLAMA
 ARG USE_CUDA_VER
 ARG USE_EMBEDDING_MODEL
 ARG USE_RERANKING_MODEL
-ARG BUILD_HASH
 ARG UID
 ARG GID

@@ -154,6 +153,7 @@ HEALTHCHECK CMD curl --silent --fail http://localhost:8080/health | jq -e '.stat

 USER $UID:$GID

+ARG BUILD_HASH
 ENV WEBUI_BUILD_VERSION=${BUILD_HASH}

 CMD [ "bash", "start.sh"]
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@
 [![Discord](https://img.shields.io/badge/Discord-Open_WebUI-blue?logo=discord&logoColor=white)](https://discord.gg/5rJgQTnV4s)
 [![](https://img.shields.io/static/v1?label=Sponsor&message=%E2%9D%A4&logo=GitHub&color=%23fe8e86)](https://github.com/sponsors/tjbck)

-Open WebUI is an extensible, feature-rich, and user-friendly self-hosted WebUI designed to operate entirely offline. It supports various LLM runners, including Ollama and OpenAI-compatible APIs. For more information, be sure to check out our [Open WebUI Documentation](https://docs.openwebui.com/).
+Open WebUI is an [extensible](https://github.com/open-webui/pipelines), feature-rich, and user-friendly self-hosted WebUI designed to operate entirely offline. It supports various LLM runners, including Ollama and OpenAI-compatible APIs. For more information, be sure to check out our [Open WebUI Documentation](https://docs.openwebui.com/).

 ![Open WebUI Demo](./demo.gif)

@@ -19,7 +19,9 @@ Open WebUI is an extensible, feature-rich, and user-friendly self-hosted WebUI d

 - 🚀 **Effortless Setup**: Install seamlessly using Docker or Kubernetes (kubectl, kustomize or helm) for a hassle-free experience with support for both `:ollama` and `:cuda` tagged images.

- 🤝 **OpenAI API Integration**: Effortlessly integrate OpenAI-compatible APIs for versatile conversations alongside Ollama models. Customize the OpenAI API URL to link with **LMStudio, GroqCloud, Mistral, OpenRouter, and more**.
+- 🤝 **Ollama/OpenAI API Integration**: Effortlessly integrate OpenAI-compatible APIs for versatile conversations alongside Ollama models. Customize the OpenAI API URL to link with **LMStudio, GroqCloud, Mistral, OpenRouter, and more**.
+
+- 🧩 **Pipelines, Open WebUI Plugin Support**: Seamlessly integrate custom logic and Python libraries into Open WebUI using [Pipelines Plugin Framework](https://github.com/open-webui/pipelines). Launch your Pipelines instance, set the OpenAI URL to the Pipelines URL, and explore endless possibilities. [Examples](https://github.com/open-webui/pipelines/tree/main/examples) include **Function Calling**, User **Rate Limiting** to control access, **Usage Monitoring** with tools like Langfuse, **Live Translation with LibreTranslate** for multilingual support, **Toxic Message Filtering** and much more.

 - 📱 **Responsive Design**: Enjoy a seamless experience across Desktop PC, Laptop, and Mobile devices.

@@ -27,11 +29,15 @@ Open WebUI is an extensible, feature-rich, and user-friendly self-hosted WebUI d

 - ✒️🔢 **Full Markdown and LaTeX Support**: Elevate your LLM experience with comprehensive Markdown and LaTeX capabilities for enriched interaction.

- 🧩 **Model Builder**: Easily create Ollama models via the Web UI. Create and add custom characters/agents, customize chat elements, and import models effortlessly through [Open WebUI Community](https://openwebui.com/) integration.
+- 🎤📹 **Hands-Free Voice/Video Call**: Experience seamless communication with integrated hands-free voice and video call features, allowing for a more dynamic and interactive chat environment.
+
+- 🛠️ **Model Builder**: Easily create Ollama models via the Web UI. Create and add custom characters/agents, customize chat elements, and import models effortlessly through [Open WebUI Community](https://openwebui.com/) integration.
+
+- 🐍 **Native Python Function Calling Tool**: Enhance your LLMs with built-in code editor support in the tools workspace. Bring Your Own Function (BYOF) by simply adding your pure Python functions, enabling seamless integration with LLMs.

 - 📚 **Local RAG Integration**: Dive into the future of chat interactions with groundbreaking Retrieval Augmented Generation (RAG) support. This feature seamlessly integrates document interactions into your chat experience. You can load documents directly into the chat or add files to your document library, effortlessly accessing them using the `#` command before a query.

- 🔍 **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, and `serper`, and inject the results directly into your chat experience.
+- 🔍 **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, and `Serply` and inject the results directly into your chat experience.

 - 🌐 **Web Browsing Capability**: Seamlessly integrate websites into your chat experience using the `#` command followed by a URL. This feature allows you to incorporate web content directly into your conversations, enhancing the richness and depth of your interactions.

@@ -144,10 +150,19 @@ docker run --rm --volume /var/run/docker.sock:/var/run/docker.sock containrrr/wa

 In the last part of the command, replace `open-webui` with your container name if it is different.

-### Moving from Ollama WebUI to Open WebUI
-
 Check our Migration Guide available in our [Open WebUI Documentation](https://docs.openwebui.com/migration/).

+### Using the Dev Branch 🌙
+
+> [!WARNING]
+> The `:dev` branch contains the latest unstable features and changes. Use it at your own risk as it may have bugs or incomplete features.
+
+If you want to try out the latest bleeding-edge features and are okay with occasional instability, you can use the `:dev` tag like this:
+
+```bash
+docker run -d -p 3000:8080 -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:dev
+```
+
 ## What's Next? 🌟

 Discover upcoming features on our roadmap in the [Open WebUI Documentation](https://docs.openwebui.com/roadmap/).

--- a/backend/apps/audio/main.py
+++ b/backend/apps/audio/main.py
@@ -17,13 +17,12 @@ from fastapi.middleware.cors import CORSMiddleware
 from faster_whisper import WhisperModel
 from pydantic import BaseModel

-
+import uuid
 import requests
 import hashlib
 from pathlib import Path
 import json

-
 from constants import ERROR_MESSAGES
 from utils.utils import (
    decode_token,
@@ -41,10 +40,15 @@ from config import (
    WHISPER_MODEL_DIR,
    WHISPER_MODEL_AUTO_UPDATE,
    DEVICE_TYPE,
-    AUDIO_OPENAI_API_BASE_URL,
-    AUDIO_OPENAI_API_KEY,
-    AUDIO_OPENAI_API_MODEL,
-    AUDIO_OPENAI_API_VOICE,
+    AUDIO_STT_OPENAI_API_BASE_URL,
+    AUDIO_STT_OPENAI_API_KEY,
+    AUDIO_TTS_OPENAI_API_BASE_URL,
+    AUDIO_TTS_OPENAI_API_KEY,
+    AUDIO_STT_ENGINE,
+    AUDIO_STT_MODEL,
+    AUDIO_TTS_ENGINE,
+    AUDIO_TTS_MODEL,
+    AUDIO_TTS_VOICE,
    AppConfig,
 )

@@ -61,10 +65,17 @@ app.add_middleware(
 )

 app.state.config = AppConfig()
-app.state.config.OPENAI_API_BASE_URL = AUDIO_OPENAI_API_BASE_URL
-app.state.config.OPENAI_API_KEY = AUDIO_OPENAI_API_KEY
-app.state.config.OPENAI_API_MODEL = AUDIO_OPENAI_API_MODEL
-app.state.config.OPENAI_API_VOICE = AUDIO_OPENAI_API_VOICE
+
+app.state.config.STT_OPENAI_API_BASE_URL = AUDIO_STT_OPENAI_API_BASE_URL
+app.state.config.STT_OPENAI_API_KEY = AUDIO_STT_OPENAI_API_KEY
+app.state.config.STT_ENGINE = AUDIO_STT_ENGINE
+app.state.config.STT_MODEL = AUDIO_STT_MODEL
+
+app.state.config.TTS_OPENAI_API_BASE_URL = AUDIO_TTS_OPENAI_API_BASE_URL
+app.state.config.TTS_OPENAI_API_KEY = AUDIO_TTS_OPENAI_API_KEY
+app.state.config.TTS_ENGINE = AUDIO_TTS_ENGINE
+app.state.config.TTS_MODEL = AUDIO_TTS_MODEL
+app.state.config.TTS_VOICE = AUDIO_TTS_VOICE

 # setting device type for whisper model
 whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
@@ -74,41 +85,101 @@ SPEECH_CACHE_DIR = Path(CACHE_DIR).joinpath("./audio/speech/")
 SPEECH_CACHE_DIR.mkdir(parents=True, exist_ok=True)


-class OpenAIConfigUpdateForm(BaseModel):
-    url: str
-    key: str
-    model: str
-    speaker: str
+class TTSConfigForm(BaseModel):
+    OPENAI_API_BASE_URL: str
+    OPENAI_API_KEY: str
+    ENGINE: str
+    MODEL: str
+    VOICE: str
+
+
+class STTConfigForm(BaseModel):
+    OPENAI_API_BASE_URL: str
+    OPENAI_API_KEY: str
+    ENGINE: str
+    MODEL: str
+
+
+class AudioConfigUpdateForm(BaseModel):
+    tts: TTSConfigForm
+    stt: STTConfigForm
+
+
+from pydub import AudioSegment
+from pydub.utils import mediainfo
+
+
+def is_mp4_audio(file_path):
+    """Check if the given file is an MP4 audio file."""
+    if not os.path.isfile(file_path):
+        print(f"File not found: {file_path}")
+        return False
+
+    info = mediainfo(file_path)
+    if (
+        info.get("codec_name") == "aac"
+        and info.get("codec_type") == "audio"
+        and info.get("codec_tag_string") == "mp4a"
+    ):
+        return True
+    return False
+
+
+def convert_mp4_to_wav(file_path, output_path):
+    """Convert MP4 audio file to WAV format."""
+    audio = AudioSegment.from_file(file_path, format="mp4")
+    audio.export(output_path, format="wav")
+    print(f"Converted {file_path} to {output_path}")


 @app.get("/config")
-async def get_openai_config(user=Depends(get_admin_user)):
+async def get_audio_config(user=Depends(get_admin_user)):
    return {
-        "OPENAI_API_BASE_URL": app.state.config.OPENAI_API_BASE_URL,
-        "OPENAI_API_KEY": app.state.config.OPENAI_API_KEY,
-        "OPENAI_API_MODEL": app.state.config.OPENAI_API_MODEL,
-        "OPENAI_API_VOICE": app.state.config.OPENAI_API_VOICE,
+        "tts": {
+            "OPENAI_API_BASE_URL": app.state.config.TTS_OPENAI_API_BASE_URL,
+            "OPENAI_API_KEY": app.state.config.TTS_OPENAI_API_KEY,
+            "ENGINE": app.state.config.TTS_ENGINE,
+            "MODEL": app.state.config.TTS_MODEL,
+            "VOICE": app.state.config.TTS_VOICE,
+        },
+        "stt": {
+            "OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
+            "OPENAI_API_KEY": app.state.config.STT_OPENAI_API_KEY,
+            "ENGINE": app.state.config.STT_ENGINE,
+            "MODEL": app.state.config.STT_MODEL,
+        },
    }


 @app.post("/config/update")
-async def update_openai_config(
-    form_data: OpenAIConfigUpdateForm, user=Depends(get_admin_user)
+async def update_audio_config(
+    form_data: AudioConfigUpdateForm, user=Depends(get_admin_user)
 ):
-    if form_data.key == "":
-        raise HTTPException(status_code=400, detail=ERROR_MESSAGES.API_KEY_NOT_FOUND)
+    app.state.config.TTS_OPENAI_API_BASE_URL = form_data.tts.OPENAI_API_BASE_URL
+    app.state.config.TTS_OPENAI_API_KEY = form_data.tts.OPENAI_API_KEY
+    app.state.config.TTS_ENGINE = form_data.tts.ENGINE
+    app.state.config.TTS_MODEL = form_data.tts.MODEL
+    app.state.config.TTS_VOICE = form_data.tts.VOICE

-    app.state.config.OPENAI_API_BASE_URL = form_data.url
-    app.state.config.OPENAI_API_KEY = form_data.key
-    app.state.config.OPENAI_API_MODEL = form_data.model
-    app.state.config.OPENAI_API_VOICE = form_data.speaker
+    app.state.config.STT_OPENAI_API_BASE_URL = form_data.stt.OPENAI_API_BASE_URL
+    app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
+    app.state.config.STT_ENGINE = form_data.stt.ENGINE
+    app.state.config.STT_MODEL = form_data.stt.MODEL

    return {
-        "status": True,
-        "OPENAI_API_BASE_URL": app.state.config.OPENAI_API_BASE_URL,
-        "OPENAI_API_KEY": app.state.config.OPENAI_API_KEY,
-        "OPENAI_API_MODEL": app.state.config.OPENAI_API_MODEL,
-        "OPENAI_API_VOICE": app.state.config.OPENAI_API_VOICE,
+        "tts": {
+            "OPENAI_API_BASE_URL": app.state.config.TTS_OPENAI_API_BASE_URL,
+            "OPENAI_API_KEY": app.state.config.TTS_OPENAI_API_KEY,
+            "ENGINE": app.state.config.TTS_ENGINE,
+            "MODEL": app.state.config.TTS_MODEL,
+            "VOICE": app.state.config.TTS_VOICE,
+        },
+        "stt": {
+            "OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
+            "OPENAI_API_KEY": app.state.config.STT_OPENAI_API_KEY,
+            "ENGINE": app.state.config.STT_ENGINE,
+            "MODEL": app.state.config.STT_MODEL,
+        },
    }


@@ -125,13 +196,21 @@ async def speech(request: Request, user=Depends(get_verified_user)):
        return FileResponse(file_path)

    headers = {}
-    headers["Authorization"] = f"Bearer {app.state.config.OPENAI_API_KEY}"
+    headers["Authorization"] = f"Bearer {app.state.config.TTS_OPENAI_API_KEY}"
    headers["Content-Type"] = "application/json"

+    try:
+        body = body.decode("utf-8")
+        body = json.loads(body)
+        body["model"] = app.state.config.TTS_MODEL
+        body = json.dumps(body).encode("utf-8")
+    except Exception as e:
+        pass
+
    r = None
    try:
        r = requests.post(
-            url=f"{app.state.config.OPENAI_API_BASE_URL}/audio/speech",
+            url=f"{app.state.config.TTS_OPENAI_API_BASE_URL}/audio/speech",
            data=body,
            headers=headers,
            stream=True,
@@ -181,41 +260,110 @@ def transcribe(
        )

    try:
-        filename = file.filename
-        file_path = f"{UPLOAD_DIR}/{filename}"
+        ext = file.filename.split(".")[-1]
+
+        id = uuid.uuid4()
+        filename = f"{id}.{ext}"
+
+        file_dir = f"{CACHE_DIR}/audio/transcriptions"
+        os.makedirs(file_dir, exist_ok=True)
+        file_path = f"{file_dir}/{filename}"
+
+        print(filename)
+
        contents = file.file.read()
        with open(file_path, "wb") as f:
            f.write(contents)
            f.close()

-        whisper_kwargs = {
-            "model_size_or_path": WHISPER_MODEL,
-            "device": whisper_device_type,
-            "compute_type": "int8",
-            "download_root": WHISPER_MODEL_DIR,
-            "local_files_only": not WHISPER_MODEL_AUTO_UPDATE,
-        }
-
-        log.debug(f"whisper_kwargs: {whisper_kwargs}")
-
-        try:
-            model = WhisperModel(**whisper_kwargs)
-        except:
-            log.warning(
-                "WhisperModel initialization failed, attempting download with local_files_only=False"
+        if app.state.config.STT_ENGINE == "":
+            whisper_kwargs = {
+                "model_size_or_path": WHISPER_MODEL,
+                "device": whisper_device_type,
+                "compute_type": "int8",
+                "download_root": WHISPER_MODEL_DIR,
+                "local_files_only": not WHISPER_MODEL_AUTO_UPDATE,
+            }
+
+            log.debug(f"whisper_kwargs: {whisper_kwargs}")
+
+            try:
+                model = WhisperModel(**whisper_kwargs)
+            except:
+                log.warning(
+                    "WhisperModel initialization failed, attempting download with local_files_only=False"
+                )
+                whisper_kwargs["local_files_only"] = False
+                model = WhisperModel(**whisper_kwargs)
+
+            segments, info = model.transcribe(file_path, beam_size=5)
+            log.info(
+                "Detected language '%s' with probability %f"
+                % (info.language, info.language_probability)
            )
-            whisper_kwargs["local_files_only"] = False
-            model = WhisperModel(**whisper_kwargs)

-        segments, info = model.transcribe(file_path, beam_size=5)
-        log.info(
-            "Detected language '%s' with probability %f"
-            % (info.language, info.language_probability)
-        )
+            transcript = "".join([segment.text for segment in list(segments)])

-        transcript = "".join([segment.text for segment in list(segments)])
+            data = {"text": transcript.strip()}

-        return {"text": transcript.strip()}
+            # save the transcript to a json file
+            transcript_file = f"{file_dir}/{id}.json"
+            with open(transcript_file, "w") as f:
+                json.dump(data, f)
+
+            print(data)
+
+            return data
+
+        elif app.state.config.STT_ENGINE == "openai":
+            if is_mp4_audio(file_path):
+                print("is_mp4_audio")
+                os.rename(file_path, file_path.replace(".wav", ".mp4"))
+                # Convert MP4 audio file to WAV format
+                convert_mp4_to_wav(file_path.replace(".wav", ".mp4"), file_path)
+
+            headers = {"Authorization": f"Bearer {app.state.config.STT_OPENAI_API_KEY}"}
+
+            files = {"file": (filename, open(file_path, "rb"))}
+            data = {"model": "whisper-1"}
+
+            print(files, data)
+
+            r = None
+            try:
+                r = requests.post(
+                    url=f"{app.state.config.STT_OPENAI_API_BASE_URL}/audio/transcriptions",
+                    headers=headers,
+                    files=files,
+                    data=data,
+                )
+
+                r.raise_for_status()
+
+                data = r.json()
+
+                # save the transcript to a json file
+                transcript_file = f"{file_dir}/{id}.json"
+                with open(transcript_file, "w") as f:
+                    json.dump(data, f)
+
+                print(data)
+                return data
+            except Exception as e:
+                log.exception(e)
+                error_detail = "Open WebUI: Server Connection Error"
+                if r is not None:
+                    try:
+                        res = r.json()
+                        if "error" in res:
+                            error_detail = f"External: {res['error']['message']}"
+                    except:
+                        error_detail = f"External: {e}"
+
+                raise HTTPException(
+                    status_code=r.status_code if r != None else 500,
+                    detail=error_detail,
+                )

    except Exception as e:
        log.exception(e)

--- a/backend/apps/ollama/main.py
+++ b/backend/apps/ollama/main.py
--- a/backend/apps/openai/main.py
+++ b/backend/apps/openai/main.py
@@ -9,6 +9,7 @@ import json
 import logging

 from pydantic import BaseModel
+from starlette.background import BackgroundTask

 from apps.webui.models.models import Models
 from apps.webui.models.users import Users
@@ -185,7 +186,7 @@ async def fetch_url(url, key):
    timeout = aiohttp.ClientTimeout(total=5)
    try:
        headers = {"Authorization": f"Bearer {key}"}
-        async with aiohttp.ClientSession(timeout=timeout) as session:
+        async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
            async with session.get(url, headers=headers) as response:
                return await response.json()
    except Exception as e:
@@ -194,6 +195,16 @@ async def fetch_url(url, key):
        return None


+async def cleanup_response(
+    response: Optional[aiohttp.ClientResponse],
+    session: Optional[aiohttp.ClientSession],
+):
+    if response:
+        response.close()
+    if session:
+        await session.close()
+
+
 def merge_models_lists(model_lists):
    log.debug(f"merge_models_lists {model_lists}")
    merged_list = []
@@ -228,6 +239,27 @@ async def get_all_models(raw: bool = False):
    ) or not app.state.config.ENABLE_OPENAI_API:
        models = {"data": []}
    else:
+        # Check if API KEYS length is same than API URLS length
+        if len(app.state.config.OPENAI_API_KEYS) != len(
+            app.state.config.OPENAI_API_BASE_URLS
+        ):
+            # if there are more keys than urls, remove the extra keys
+            if len(app.state.config.OPENAI_API_KEYS) > len(
+                app.state.config.OPENAI_API_BASE_URLS
+            ):
+                app.state.config.OPENAI_API_KEYS = app.state.config.OPENAI_API_KEYS[
+                    : len(app.state.config.OPENAI_API_BASE_URLS)
+                ]
+            # if there are more urls than keys, add empty keys
+            else:
+                app.state.config.OPENAI_API_KEYS += [
+                    ""
+                    for _ in range(
+                        len(app.state.config.OPENAI_API_BASE_URLS)
+                        - len(app.state.config.OPENAI_API_KEYS)
+                    )
+                ]
+
        tasks = [
            fetch_url(f"{url}/models", app.state.config.OPENAI_API_KEYS[idx])
            for idx, url in enumerate(app.state.config.OPENAI_API_BASE_URLS)
@@ -313,113 +345,155 @@ async def get_models(url_idx: Optional[int] = None, user=Depends(get_current_use
            )


-@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
-async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
+@app.post("/chat/completions")
+@app.post("/chat/completions/{url_idx}")
+async def generate_chat_completion(
+    form_data: dict,
+    url_idx: Optional[int] = None,
+    user=Depends(get_verified_user),
+):
    idx = 0
+    payload = {**form_data}

-    body = await request.body()
-    # TODO: Remove below after gpt-4-vision fix from Open AI
-    # Try to decode the body of the request from bytes to a UTF-8 string (Require add max_token to fix gpt-4-vision)
+    model_id = form_data.get("model")
+    model_info = Models.get_model_by_id(model_id)

-    payload = None
+    if model_info:
+        if model_info.base_model_id:
+            payload["model"] = model_info.base_model_id

-    try:
-        if "chat/completions" in path:
-            body = body.decode("utf-8")
-            body = json.loads(body)
+        model_info.params = model_info.params.model_dump()
+
+        if model_info.params:
+            if model_info.params.get("temperature", None) is not None:
+                payload["temperature"] = float(model_info.params.get("temperature"))

-            payload = {**body}
+            if model_info.params.get("top_p", None):
+                payload["top_p"] = int(model_info.params.get("top_p", None))

-            model_id = body.get("model")
-            model_info = Models.get_model_by_id(model_id)
+            if model_info.params.get("max_tokens", None):
+                payload["max_tokens"] = int(model_info.params.get("max_tokens", None))

-            if model_info:
-                print(model_info)
-                if model_info.base_model_id:
-                    payload["model"] = model_info.base_model_id
+            if model_info.params.get("frequency_penalty", None):
+                payload["frequency_penalty"] = int(
+                    model_info.params.get("frequency_penalty", None)
+                )

-                model_info.params = model_info.params.model_dump()
+            if model_info.params.get("seed", None):
+                payload["seed"] = model_info.params.get("seed", None)
+
+            if model_info.params.get("stop", None):
+                payload["stop"] = (
+                    [
+                        bytes(stop, "utf-8").decode("unicode_escape")
+                        for stop in model_info.params["stop"]
+                    ]
+                    if model_info.params.get("stop", None)
+                    else None
+                )

-                if model_info.params:
-                    if model_info.params.get("temperature", None):
-                        payload["temperature"] = int(
-                            model_info.params.get("temperature")
+        if model_info.params.get("system", None):
+            # Check if the payload already has a system message
+            # If not, add a system message to the payload
+            if payload.get("messages"):
+                for message in payload["messages"]:
+                    if message.get("role") == "system":
+                        message["content"] = (
+                            model_info.params.get("system", None) + message["content"]
                        )
+                        break
+                else:
+                    payload["messages"].insert(
+                        0,
+                        {
+                            "role": "system",
+                            "content": model_info.params.get("system", None),
+                        },
+                    )

-                    if model_info.params.get("top_p", None):
-                        payload["top_p"] = int(model_info.params.get("top_p", None))
+    else:
+        pass

-                    if model_info.params.get("max_tokens", None):
-                        payload["max_tokens"] = int(
-                            model_info.params.get("max_tokens", None)
-                        )
+    model = app.state.MODELS[payload.get("model")]
+    idx = model["urlIdx"]

-                    if model_info.params.get("frequency_penalty", None):
-                        payload["frequency_penalty"] = int(
-                            model_info.params.get("frequency_penalty", None)
-                        )
+    if "pipeline" in model and model.get("pipeline"):
+        payload["user"] = {"name": user.name, "id": user.id}

-                    if model_info.params.get("seed", None):
-                        payload["seed"] = model_info.params.get("seed", None)
-
-                    if model_info.params.get("stop", None):
-                        payload["stop"] = (
-                            [
-                                bytes(stop, "utf-8").decode("unicode_escape")
-                                for stop in model_info.params["stop"]
-                            ]
-                            if model_info.params.get("stop", None)
-                            else None
-                        )
+    # Check if the model is "gpt-4-vision-preview" and set "max_tokens" to 4000
+    # This is a workaround until OpenAI fixes the issue with this model
+    if payload.get("model") == "gpt-4-vision-preview":
+        if "max_tokens" not in payload:
+            payload["max_tokens"] = 4000
+        log.debug("Modified payload:", payload)

-                if model_info.params.get("system", None):
-                    # Check if the payload already has a system message
-                    # If not, add a system message to the payload
-                    if payload.get("messages"):
-                        for message in payload["messages"]:
-                            if message.get("role") == "system":
-                                message["content"] = (
-                                    model_info.params.get("system", None)
-                                    + message["content"]
-                                )
-                                break
-                        else:
-                            payload["messages"].insert(
-                                0,
-                                {
-                                    "role": "system",
-                                    "content": model_info.params.get("system", None),
-                                },
-                            )
-            else:
-                pass
+    # Convert the modified body back to JSON
+    payload = json.dumps(payload)

-            model = app.state.MODELS[payload.get("model")]
+    print(payload)

-            idx = model["urlIdx"]
+    url = app.state.config.OPENAI_API_BASE_URLS[idx]
+    key = app.state.config.OPENAI_API_KEYS[idx]

-            if "pipeline" in model and model.get("pipeline"):
-                payload["user"] = {"name": user.name, "id": user.id}
-                payload["title"] = (
-                    True
-                    if payload["stream"] == False and payload["max_tokens"] == 50
-                    else False
-                )
+    print(payload)

-            # Check if the model is "gpt-4-vision-preview" and set "max_tokens" to 4000
-            # This is a workaround until OpenAI fixes the issue with this model
-            if payload.get("model") == "gpt-4-vision-preview":
-                if "max_tokens" not in payload:
-                    payload["max_tokens"] = 4000
-                log.debug("Modified payload:", payload)
+    headers = {}
+    headers["Authorization"] = f"Bearer {key}"
+    headers["Content-Type"] = "application/json"

-            # Convert the modified body back to JSON
-            payload = json.dumps(payload)
+    r = None
+    session = None
+    streaming = False

-    except json.JSONDecodeError as e:
-        log.error("Error loading request body into a dictionary:", e)
+    try:
+        session = aiohttp.ClientSession(trust_env=True)
+        r = await session.request(
+            method="POST",
+            url=f"{url}/chat/completions",
+            data=payload,
+            headers=headers,
+        )

-    print(payload)
+        r.raise_for_status()
+
+        # Check if response is SSE
+        if "text/event-stream" in r.headers.get("Content-Type", ""):
+            streaming = True
+            return StreamingResponse(
+                r.content,
+                status_code=r.status,
+                headers=dict(r.headers),
+                background=BackgroundTask(
+                    cleanup_response, response=r, session=session
+                ),
+            )
+        else:
+            response_data = await r.json()
+            return response_data
+    except Exception as e:
+        log.exception(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = await r.json()
+                print(res)
+                if "error" in res:
+                    error_detail = f"External: {res['error']['message'] if 'message' in res['error'] else res['error']}"
+            except:
+                error_detail = f"External: {e}"
+        raise HTTPException(status_code=r.status if r else 500, detail=error_detail)
+    finally:
+        if not streaming and session:
+            if r:
+                r.close()
+            await session.close()
+
+
+@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
+    idx = 0
+
+    body = await request.body()

    url = app.state.config.OPENAI_API_BASE_URLS[idx]
    key = app.state.config.OPENAI_API_KEYS[idx]
@@ -431,40 +505,48 @@ async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
    headers["Content-Type"] = "application/json"

    r = None
+    session = None
+    streaming = False

    try:
-        r = requests.request(
+        session = aiohttp.ClientSession(trust_env=True)
+        r = await session.request(
            method=request.method,
            url=target_url,
-            data=payload if payload else body,
+            data=body,
            headers=headers,
-            stream=True,
        )

        r.raise_for_status()

        # Check if response is SSE
        if "text/event-stream" in r.headers.get("Content-Type", ""):
+            streaming = True
            return StreamingResponse(
-                r.iter_content(chunk_size=8192),
-                status_code=r.status_code,
+                r.content,
+                status_code=r.status,
                headers=dict(r.headers),
+                background=BackgroundTask(
+                    cleanup_response, response=r, session=session
+                ),
            )
        else:
-            response_data = r.json()
+            response_data = await r.json()
            return response_data
    except Exception as e:
        log.exception(e)
        error_detail = "Open WebUI: Server Connection Error"
        if r is not None:
            try:
-                res = r.json()
+                res = await r.json()
                print(res)
                if "error" in res:
                    error_detail = f"External: {res['error']['message'] if 'message' in res['error'] else res['error']}"
            except:
                error_detail = f"External: {e}"
-
-        raise HTTPException(
-            status_code=r.status_code if r else 500, detail=error_detail
-        )
+        raise HTTPException(status_code=r.status if r else 500, detail=error_detail)
+    finally:
+        if not streaming and session:
+            if r:
+                r.close()
+            await session.close()
--- a/backend/apps/rag/main.py
+++ b/backend/apps/rag/main.py
--- a/backend/apps/rag/search/brave.py
+++ b/backend/apps/rag/search/brave.py
@@ -3,13 +3,13 @@ import logging
 import requests

 from apps.rag.search.main import SearchResult
-from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
+from config import SRC_LOG_LEVELS

 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])


-def search_brave(api_key: str, query: str) -> list[SearchResult]:
+def search_brave(api_key: str, query: str, count: int) -> list[SearchResult]:
    """Search using Brave's Search API and return the results as a list of SearchResult objects.

    Args:
@@ -22,7 +22,7 @@ def search_brave(api_key: str, query: str) -> list[SearchResult]:
        "Accept-Encoding": "gzip",
        "X-Subscription-Token": api_key,
    }
-    params = {"q": query, "count": RAG_WEB_SEARCH_RESULT_COUNT}
+    params = {"q": query, "count": count}

    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
@@ -33,5 +33,5 @@ def search_brave(api_key: str, query: str) -> list[SearchResult]:
        SearchResult(
            link=result["url"], title=result.get("title"), snippet=result.get("snippet")
        )
-        for result in results[:RAG_WEB_SEARCH_RESULT_COUNT]
+        for result in results[:count]
    ]
--- a/backend/apps/rag/search/duckduckgo.py
+++ b/backend/apps/rag/search/duckduckgo.py
+import logging
+
+from apps.rag.search.main import SearchResult
+from duckduckgo_search import DDGS
+from config import SRC_LOG_LEVELS
+
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["RAG"])
+
+
+def search_duckduckgo(query: str, count: int) -> list[SearchResult]:
+    """
+    Search using DuckDuckGo's Search API and return the results as a list of SearchResult objects.
+    Args:
+        query (str): The query to search for
+        count (int): The number of results to return
+
+    Returns:
+        List[SearchResult]: A list of search results
+    """
+    # Use the DDGS context manager to create a DDGS object
+    with DDGS() as ddgs:
+        # Use the ddgs.text() method to perform the search
+        ddgs_gen = ddgs.text(
+            query, safesearch="moderate", max_results=count, backend="api"
+        )
+        # Check if there are search results
+        if ddgs_gen:
+            # Convert the search results into a list
+            search_results = [r for r in ddgs_gen]
+
+    # Create an empty list to store the SearchResult objects
+    results = []
+    # Iterate over each search result
+    for result in search_results:
+        # Create a SearchResult object and append it to the results list
+        results.append(
+            SearchResult(
+                link=result["href"],
+                title=result.get("title"),
+                snippet=result.get("body"),
+            )
+        )
+    print(results)
+    # Return the list of search results
+    return results
--- a/backend/apps/rag/search/google_pse.py
+++ b/backend/apps/rag/search/google_pse.py
@@ -4,14 +4,14 @@ import logging
 import requests

 from apps.rag.search.main import SearchResult
-from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
+from config import SRC_LOG_LEVELS

 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])


 def search_google_pse(
-    api_key: str, search_engine_id: str, query: str
+    api_key: str, search_engine_id: str, query: str, count: int
 ) -> list[SearchResult]:
    """Search using Google's Programmable Search Engine API and return the results as a list of SearchResult objects.

@@ -27,7 +27,7 @@ def search_google_pse(
        "cx": search_engine_id,
        "q": query,
        "key": api_key,
-        "num": RAG_WEB_SEARCH_RESULT_COUNT,
+        "num": count,
    }

    response = requests.request("GET", url, headers=headers, params=params)

--- a/backend/apps/rag/search/searxng.py
+++ b/backend/apps/rag/search/searxng.py
 import logging
-
 import requests

+from typing import List
+
 from apps.rag.search.main import SearchResult
-from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
+from config import SRC_LOG_LEVELS

 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])


-def search_searxng(query_url: str, query: str) -> list[SearchResult]:
-    """Search a SearXNG instance for a query and return the results as a list of SearchResult objects.
+def search_searxng(
+    query_url: str, query: str, count: int, **kwargs
+) -> List[SearchResult]:
+    """
+    Search a SearXNG instance for a given query and return the results as a list of SearchResult objects.
+
+    The function allows passing additional parameters such as language or time_range to tailor the search result.

    Args:
-        query_url (str): The URL of the SearXNG instance to search. Must contain "<query>" as a placeholder
-        query (str): The query to search for
+        query_url (str): The base URL of the SearXNG server.
+        query (str): The search term or question to find in the SearXNG database.
+        count (int): The maximum number of results to retrieve from the search.
+
+    Keyword Args:
+        language (str): Language filter for the search results; e.g., "en-US". Defaults to an empty string.
+        safesearch (int): Safe search filter for safer web results; 0 = off, 1 = moderate, 2 = strict. Defaults to 1 (moderate).
+        time_range (str): Time range for filtering results by date; e.g., "2023-04-05..today" or "all-time". Defaults to ''.
+        categories: (Optional[List[str]]): Specific categories within which the search should be performed, defaulting to an empty string if not provided.
+
+    Returns:
+        List[SearchResult]: A list of SearchResults sorted by relevance score in descending order.
+
+    Raise:
+        requests.exceptions.RequestException: If a request error occurs during the search process.
    """
-    url = query_url.replace("<query>", query)
-    if "&format=json" not in url:
-        url += "&format=json"
-    log.debug(f"searching {url}")

-    r = requests.get(
-        url,
+    # Default values for optional parameters are provided as empty strings or None when not specified.
+    language = kwargs.get("language", "en-US")
+    safesearch = kwargs.get("safesearch", "1")
+    time_range = kwargs.get("time_range", "")
+    categories = "".join(kwargs.get("categories", []))
+
+    params = {
+        "q": query,
+        "format": "json",
+        "pageno": 1,
+        "safesearch": safesearch,
+        "language": language,
+        "time_range": time_range,
+        "categories": categories,
+        "theme": "simple",
+        "image_proxy": 0,
+    }
+
+    # Legacy query format
+    if "<query>" in query_url:
+        # Strip all query parameters from the URL
+        query_url = query_url.split("?")[0]
+
+    log.debug(f"searching {query_url}")
+
+    response = requests.get(
+        query_url,
        headers={
            "User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot",
            "Accept": "text/html",
@@ -30,15 +70,17 @@ def search_searxng(query_url: str, query: str) -> list[SearchResult]:
            "Accept-Language": "en-US,en;q=0.5",
            "Connection": "keep-alive",
        },
+        params=params,
    )
-    r.raise_for_status()

-    json_response = r.json()
+    response.raise_for_status()  # Raise an exception for HTTP errors.
+
+    json_response = response.json()
    results = json_response.get("results", [])
    sorted_results = sorted(results, key=lambda x: x.get("score", 0), reverse=True)
    return [
        SearchResult(
            link=result["url"], title=result.get("title"), snippet=result.get("content")
        )
-        for result in sorted_results[:RAG_WEB_SEARCH_RESULT_COUNT]
+        for result in sorted_results[:count]
    ]
--- a/backend/apps/rag/search/serper.py
+++ b/backend/apps/rag/search/serper.py
@@ -4,13 +4,13 @@ import logging
 import requests

 from apps.rag.search.main import SearchResult
-from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
+from config import SRC_LOG_LEVELS

 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])


-def search_serper(api_key: str, query: str) -> list[SearchResult]:
+def search_serper(api_key: str, query: str, count: int) -> list[SearchResult]:
    """Search using serper.dev's API and return the results as a list of SearchResult objects.

    Args:
@@ -35,5 +35,5 @@ def search_serper(api_key: str, query: str) -> list[SearchResult]:
            title=result.get("title"),
            snippet=result.get("description"),
        )
-        for result in results[:RAG_WEB_SEARCH_RESULT_COUNT]
+        for result in results[:count]
    ]
--- a/backend/apps/rag/search/serply.py
+++ b/backend/apps/rag/search/serply.py
+import json
+import logging
+
+import requests
+from urllib.parse import urlencode
+
+from apps.rag.search.main import SearchResult
+from config import SRC_LOG_LEVELS
+
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["RAG"])
+
+
+def search_serply(
+    api_key: str,
+    query: str,
+    count: int,
+    hl: str = "us",
+    limit: int = 10,
+    device_type: str = "desktop",
+    proxy_location: str = "US",
+) -> list[SearchResult]:
+    """Search using serper.dev's API and return the results as a list of SearchResult objects.
+
+    Args:
+        api_key (str): A serply.io API key
+        query (str): The query to search for
+        hl (str): Host Language code to display results in (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
+        limit (int): The maximum number of results to return [10-100, defaults to 10]
+    """
+    log.info("Searching with Serply")
+
+    url = "https://api.serply.io/v1/search/"
+
+    query_payload = {
+        "q": query,
+        "language": "en",
+        "num": limit,
+        "gl": proxy_location.upper(),
+        "hl": hl.lower(),
+    }
+
+    url = f"{url}{urlencode(query_payload)}"
+    headers = {
+        "X-API-KEY": api_key,
+        "X-User-Agent": device_type,
+        "User-Agent": "open-webui",
+        "X-Proxy-Location": proxy_location,
+    }
+
+    response = requests.request("GET", url, headers=headers)
+    response.raise_for_status()
+
+    json_response = response.json()
+    log.info(f"results from serply search: {json_response}")
+
+    results = sorted(
+        json_response.get("results", []), key=lambda x: x.get("realPosition", 0)
+    )
+
+    return [
+        SearchResult(
+            link=result["link"],
+            title=result.get("title"),
+            snippet=result.get("description"),
+        )
+        for result in results[:count]
+    ]
--- a/backend/apps/rag/search/serpstack.py
+++ b/backend/apps/rag/search/serpstack.py
@@ -4,14 +4,14 @@ import logging
 import requests

 from apps.rag.search.main import SearchResult
-from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
+from config import SRC_LOG_LEVELS

 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])


 def search_serpstack(
-    api_key: str, query: str, https_enabled: bool = True
+    api_key: str, query: str, count: int, https_enabled: bool = True
 ) -> list[SearchResult]:
    """Search using serpstack.com's and return the results as a list of SearchResult objects.

@@ -39,5 +39,5 @@ def search_serpstack(
        SearchResult(
            link=result["url"], title=result.get("title"), snippet=result.get("snippet")
        )
-        for result in results[:RAG_WEB_SEARCH_RESULT_COUNT]
+        for result in results[:count]
    ]
--- a/backend/apps/rag/search/testdata/serply.json
+++ b/backend/apps/rag/search/testdata/serply.json
+{
+	"ads": [],
+	"ads_count": 0,
+	"answers": [],
+	"results": [
+		{
+			"title": "Apple",
+			"link": "https://www.apple.com/",
+			"description": "Discover the innovative world of Apple and shop everything iPhone, iPad, Apple Watch, Mac, and Apple TV, plus explore accessories, entertainment, ...",
+			"additional_links": [
+				{
+					"text": "AppleApplehttps://www.apple.com",
+					"href": "https://www.apple.com/"
+				}
+			],
+			"cite": {},
+			"subdomains": [
+				{
+					"title": "Support",
+					"link": "https://support.apple.com/",
+					"description": "SupportContact - iPhone Support - Billing and Subscriptions - Apple Repair"
+				},
+				{
+					"title": "Store",
+					"link": "https://www.apple.com/store",
+					"description": "StoreShop iPhone - Shop iPad - App Store - Shop Mac - ..."
+				},
+				{
+					"title": "Mac",
+					"link": "https://www.apple.com/mac/",
+					"description": "MacMacBook Air - MacBook Pro - iMac - Compare Mac models - Mac mini"
+				},
+				{
+					"title": "iPad",
+					"link": "https://www.apple.com/ipad/",
+					"description": "iPadShop iPad - iPad Pro - iPad Air - Compare iPad models - ..."
+				},
+				{
+					"title": "Watch",
+					"link": "https://www.apple.com/watch/",
+					"description": "WatchShop Apple Watch - Series 9 - SE - Ultra 2 - Nike - Hermès - ..."
+				}
+			],
+			"realPosition": 1
+		},
+		{
+			"title": "Apple",
+			"link": "https://www.apple.com/",
+			"description": "Discover the innovative world of Apple and shop everything iPhone, iPad, Apple Watch, Mac, and Apple TV, plus explore accessories, entertainment, ...",
+			"additional_links": [
+				{
+					"text": "AppleApplehttps://www.apple.com",
+					"href": "https://www.apple.com/"
+				}
+			],
+			"cite": {},
+			"realPosition": 2
+		},
+		{
+			"title": "Apple Inc.",
+			"link": "https://en.wikipedia.org/wiki/Apple_Inc.",
+			"description": "Apple Inc. (formerly Apple Computer, Inc.) is an American multinational corporation and technology company headquartered in Cupertino, California, ...",
+			"additional_links": [
+				{
+					"text": "Apple Inc.Wikipediahttps://en.wikipedia.org › wiki › Apple_Inc",
+					"href": "https://en.wikipedia.org/wiki/Apple_Inc."
+				},
+				{
+					"text": "",
+					"href": "https://en.wikipedia.org/wiki/Apple_Inc."
+				},
+				{
+					"text": "History",
+					"href": "https://en.wikipedia.org/wiki/History_of_Apple_Inc."
+				},
+				{
+					"text": "List of Apple products",
+					"href": "https://en.wikipedia.org/wiki/List_of_Apple_products"
+				},
+				{
+					"text": "Litigation involving Apple Inc.",
+					"href": "https://en.wikipedia.org/wiki/Litigation_involving_Apple_Inc."
+				},
+				{
+					"text": "Apple Park",
+					"href": "https://en.wikipedia.org/wiki/Apple_Park"
+				}
+			],
+			"cite": {
+				"domain": "https://en.wikipedia.org › wiki › Apple_Inc",
+				"span": " › wiki › Apple_Inc"
+			},
+			"realPosition": 3
+		},
+		{
+			"title": "Apple Inc. (AAPL) Company Profile & Facts",
+			"link": "https://finance.yahoo.com/quote/AAPL/profile/",
+			"description": "Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line ...",
+			"additional_links": [
+				{
+					"text": "Apple Inc. (AAPL) Company Profile & FactsYahoo Financehttps://finance.yahoo.com › quote › AAPL › profile",
+					"href": "https://finance.yahoo.com/quote/AAPL/profile/"
+				}
+			],
+			"cite": {
+				"domain": "https://finance.yahoo.com › quote › AAPL › profile",
+				"span": " › quote › AAPL › profile"
+			},
+			"realPosition": 4
+		},
+		{
+			"title": "Apple Inc - Company Profile and News",
+			"link": "https://www.bloomberg.com/profile/company/AAPL:US",
+			"description": "Apple Inc. Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables and accessories, and sells a variety of related ...",
+			"additional_links": [
+				{
+					"text": "Apple Inc - Company Profile and NewsBloomberghttps://www.bloomberg.com › company › AAPL:US",
+					"href": "https://www.bloomberg.com/profile/company/AAPL:US"
+				},
+				{
+					"text": "",
+					"href": "https://www.bloomberg.com/profile/company/AAPL:US"
+				}
+			],
+			"cite": {
+				"domain": "https://www.bloomberg.com › company › AAPL:US",
+				"span": " › company › AAPL:US"
+			},
+			"realPosition": 5
+		},
+		{
+			"title": "Apple Inc. | History, Products, Headquarters, & Facts",
+			"link": "https://www.britannica.com/money/Apple-Inc",
+			"description": "May 22, 2024 — Apple Inc. is an American multinational technology company that revolutionized the technology sector through its innovation of computer ...",
+			"additional_links": [
+				{
+					"text": "Apple Inc. | History, Products, Headquarters, & FactsBritannicahttps://www.britannica.com › money › Apple-Inc",
+					"href": "https://www.britannica.com/money/Apple-Inc"
+				},
+				{
+					"text": "",
+					"href": "https://www.britannica.com/money/Apple-Inc"
+				}
+			],
+			"cite": {
+				"domain": "https://www.britannica.com › money › Apple-Inc",
+				"span": " › money › Apple-Inc"
+			},
+			"realPosition": 6
+		}
+	],
+	"shopping_ads": [],
+	"places": [
+		{
+			"title": "Apple Inc."
+		},
+		{
+			"title": "Apple Inc"
+		},
+		{
+			"title": "Apple Inc"
+		}
+	],
+	"related_searches": {
+		"images": [],
+		"text": [
+			{
+				"title": "apple inc full form",
+				"link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+Inc+full+form&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhPEAE"
+			},
+			{
+				"title": "apple company history",
+				"link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+company+history&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhOEAE"
+			},
+			{
+				"title": "apple store",
+				"link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+Store&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhQEAE"
+			},
+			{
+				"title": "apple id",
+				"link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+id&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhSEAE"
+			},
+			{
+				"title": "apple inc industry",
+				"link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+Inc+industry&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhREAE"
+			},
+			{
+				"title": "apple login",
+				"link": "https://www.google.com/search?sca_esv=6b6df170a5c9891b&sca_upv=1&q=Apple+login&sa=X&ved=2ahUKEwjLxuSJwM-GAxUHODQIHYuJBhgQ1QJ6BAhTEAE"
+			}
+		]
+	},
+	"image_results": [],
+	"carousel": [],
+	"total": 2450000000,
+	"knowledge_graph": "",
+	"related_questions": [
+		"What does the Apple Inc do?",
+		"Why did Apple change to Apple Inc?",
+		"Who owns Apple Inc.?",
+		"What is Apple Inc best known for?"
+	],
+	"carousel_count": 0,
+	"ts": 2.491065263748169,
+	"device_type": null
+}
--- a/backend/apps/rag/utils.py
+++ b/backend/apps/rag/utils.py
@@ -2,7 +2,7 @@ import os
 import logging
 import requests

-from typing import List
+from typing import List, Union

 from apps.ollama.main import (
    generate_ollama_embeddings,
@@ -20,23 +20,8 @@ from langchain.retrievers import (

 from typing import Optional

-from apps.rag.search.brave import search_brave
-from apps.rag.search.google_pse import search_google_pse
-from apps.rag.search.main import SearchResult
-from apps.rag.search.searxng import search_searxng
-from apps.rag.search.serper import search_serper
-from apps.rag.search.serpstack import search_serpstack
-from config import (
-    SRC_LOG_LEVELS,
-    CHROMA_CLIENT,
-    SEARXNG_QUERY_URL,
-    GOOGLE_PSE_API_KEY,
-    GOOGLE_PSE_ENGINE_ID,
-    BRAVE_SEARCH_API_KEY,
-    SERPSTACK_API_KEY,
-    SERPSTACK_HTTPS,
-    SERPER_API_KEY,
-)
+from utils.misc import get_last_user_message, add_or_update_system_message
+from config import SRC_LOG_LEVELS, CHROMA_CLIENT

 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])
@@ -214,6 +199,7 @@ def get_embedding_function(
    embedding_function,
    openai_key,
    openai_url,
+    batch_size,
 ):
    if embedding_engine == "":
        return lambda query: embedding_function.encode(query).tolist()
@@ -237,17 +223,22 @@ def get_embedding_function(

        def generate_multiple(query, f):
            if isinstance(query, list):
-                return [f(q) for q in query]
+                if embedding_engine == "openai":
+                    embeddings = []
+                    for i in range(0, len(query), batch_size):
+                        embeddings.extend(f(query[i : i + batch_size]))
+                    return embeddings
+                else:
+                    return [f(q) for q in query]
            else:
                return f(query)

        return lambda query: generate_multiple(query, func)


-def rag_messages(
+def get_rag_context(
    docs,
    messages,
-    template,
    embedding_function,
    k,
    reranking_function,
@@ -255,31 +246,7 @@ def rag_messages(
    hybrid_search,
 ):
    log.debug(f"docs: {docs} {messages} {embedding_function} {reranking_function}")
-
-    last_user_message_idx = None
-    for i in range(len(messages) - 1, -1, -1):
-        if messages[i]["role"] == "user":
-            last_user_message_idx = i
-            break
-
-    user_message = messages[last_user_message_idx]
-
-    if isinstance(user_message["content"], list):
-        # Handle list content input
-        content_type = "list"
-        query = ""
-        for content_item in user_message["content"]:
-            if content_item["type"] == "text":
-                query = content_item["text"]
-                break
-    elif isinstance(user_message["content"], str):
-        # Handle text content input
-        content_type = "text"
-        query = user_message["content"]
-    else:
-        # Fallback in case the input does not match expected types
-        content_type = None
-        query = ""
+    query = get_last_user_message(messages)

    extracted_collections = []
    relevant_contexts = []
@@ -350,33 +317,7 @@ def rag_messages(

    context_string = context_string.strip()

-    ra_content = rag_template(
-        template=template,
-        context=context_string,
-        query=query,
-    )
-
-    log.debug(f"ra_content: {ra_content}")
-
-    if content_type == "list":
-        new_content = []
-        for content_item in user_message["content"]:
-            if content_item["type"] == "text":
-                # Update the text item's content with ra_content
-                new_content.append({"type": "text", "text": ra_content})
-            else:
-                # Keep other types of content as they are
-                new_content.append(content_item)
-        new_user_message = {**user_message, "content": new_content}
-    else:
-        new_user_message = {
-            **user_message,
-            "content": ra_content,
-        }
-
-    messages[last_user_message_idx] = new_user_message
-
-    return messages, citations
+    return context_string, citations


 def get_model_path(model: str, update_model: bool = False):
@@ -418,8 +359,22 @@ def get_model_path(model: str, update_model: bool = False):


 def generate_openai_embeddings(
-    model: str, text: str, key: str, url: str = "https://api.openai.com/v1"
+    model: str,
+    text: Union[str, list[str]],
+    key: str,
+    url: str = "https://api.openai.com/v1",
 ):
+    if isinstance(text, list):
+        embeddings = generate_openai_batch_embeddings(model, text, key, url)
+    else:
+        embeddings = generate_openai_batch_embeddings(model, [text], key, url)
+
+    return embeddings[0] if isinstance(text, str) else embeddings
+
+
+def generate_openai_batch_embeddings(
+    model: str, texts: list[str], key: str, url: str = "https://api.openai.com/v1"
+) -> Optional[list[list[float]]]:
    try:
        r = requests.post(
            f"{url}/embeddings",
@@ -427,12 +382,12 @@ def generate_openai_embeddings(
                "Content-Type": "application/json",
                "Authorization": f"Bearer {key}",
            },
-            json={"input": text, "model": model},
+            json={"input": texts, "model": model},
        )
        r.raise_for_status()
        data = r.json()
        if "data" in data:
-            return data["data"][0]["embedding"]
+            return [elem["embedding"] for elem in data["data"]]
        else:
            raise "Something went wrong :/"
    except Exception as e:
@@ -536,31 +491,3 @@ class RerankCompressor(BaseDocumentCompressor):
            )
            final_results.append(doc)
        return final_results
-
-
-def search_web(query: str) -> list[SearchResult]:
-    """Search the web using a search engine and return the results as a list of SearchResult objects.
-    Will look for a search engine API key in environment variables in the following order:
-    - SEARXNG_QUERY_URL
-    - GOOGLE_PSE_API_KEY + GOOGLE_PSE_ENGINE_ID
-    - BRAVE_SEARCH_API_KEY
-    - SERPSTACK_API_KEY
-    - SERPER_API_KEY
-
-    Args:
-        query (str): The query to search for
-    """
-
-    # TODO: add playwright to search the web
-    if SEARXNG_QUERY_URL:
-        return search_searxng(SEARXNG_QUERY_URL, query)
-    elif GOOGLE_PSE_API_KEY and GOOGLE_PSE_ENGINE_ID:
-        return search_google_pse(GOOGLE_PSE_API_KEY, GOOGLE_PSE_ENGINE_ID, query)
-    elif BRAVE_SEARCH_API_KEY:
-        return search_brave(BRAVE_SEARCH_API_KEY, query)
-    elif SERPSTACK_API_KEY:
-        return search_serpstack(SERPSTACK_API_KEY, query, https_enabled=SERPSTACK_HTTPS)
-    elif SERPER_API_KEY:
-        return search_serper(SERPER_API_KEY, query)
-    else:
-        raise Exception("No search engine API key found in environment variables")