ai

Running

App Files Files Community

github-actions[bot] commited on 10 days ago

Commit

d836a53

•

0 Parent(s):

GitHub deploy: d48a234bb91a479b618f7828665bdfa45fdc349b

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +20 -0
.env.example +13 -0
.eslintignore +13 -0
.eslintrc.cjs +31 -0
.gitattributes +3 -0
.github/FUNDING.yml +1 -0
.github/ISSUE_TEMPLATE/bug_report.md +80 -0
.github/ISSUE_TEMPLATE/feature_request.md +35 -0
.github/dependabot.yml +12 -0
.github/pull_request_template.md +72 -0
.github/workflows/build-release.yml +72 -0
.github/workflows/deploy-to-hf-spaces.yml +63 -0
.github/workflows/docker-build.yaml +477 -0
.github/workflows/format-backend.yaml +39 -0
.github/workflows/format-build-frontend.yaml +57 -0
.github/workflows/integration-test.yml +253 -0
.github/workflows/lint-backend.disabled +27 -0
.github/workflows/lint-frontend.disabled +21 -0
.github/workflows/release-pypi.yml +32 -0
.gitignore +310 -0
.npmrc +1 -0
.prettierignore +316 -0
.prettierrc +9 -0
CHANGELOG.md +0 -0
CODE_OF_CONDUCT.md +99 -0
Caddyfile.localhost +64 -0
Dockerfile +176 -0
INSTALLATION.md +35 -0
LICENSE +21 -0
Makefile +33 -0
README.md +221 -0
TROUBLESHOOTING.md +36 -0
backend/.dockerignore +14 -0
backend/.gitignore +12 -0
backend/dev.sh +2 -0
backend/open_webui/__init__.py +77 -0
backend/open_webui/alembic.ini +114 -0
backend/open_webui/apps/audio/main.py +703 -0
backend/open_webui/apps/images/main.py +609 -0
backend/open_webui/apps/images/utils/comfyui.py +186 -0
backend/open_webui/apps/ollama/main.py +1350 -0
backend/open_webui/apps/openai/main.py +718 -0
backend/open_webui/apps/retrieval/loaders/main.py +190 -0
backend/open_webui/apps/retrieval/loaders/youtube.py +117 -0
backend/open_webui/apps/retrieval/main.py +1494 -0
backend/open_webui/apps/retrieval/models/colbert.py +81 -0
backend/open_webui/apps/retrieval/utils.py +532 -0
backend/open_webui/apps/retrieval/vector/connector.py +22 -0
backend/open_webui/apps/retrieval/vector/dbs/chroma.py +174 -0
backend/open_webui/apps/retrieval/vector/dbs/milvus.py +286 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,20 @@

+.github
+.DS_Store
+docs
+kubernetes
+node_modules
+/.svelte-kit
+/package
+.env
+.env.*
+vite.config.js.timestamp-*
+vite.config.ts.timestamp-*
+__pycache__
+.idea
+venv
+_old
+uploads
+.ipynb_checkpoints
+**/*.db
+_test
+backend/data/*

.env.example ADDED Viewed

	@@ -0,0 +1,13 @@

+# Ollama URL for the backend to connect
+# The path '/ollama' will be redirected to the specified backend URL
+OLLAMA_BASE_URL='http://localhost:11434'
+OPENAI_API_BASE_URL=''
+OPENAI_API_KEY=''
+# AUTOMATIC1111_BASE_URL="http://localhost:7860"
+# DO NOT TRACK
+SCARF_NO_ANALYTICS=true
+DO_NOT_TRACK=true
+ANONYMIZED_TELEMETRY=false

.eslintignore ADDED Viewed

	@@ -0,0 +1,13 @@

+.DS_Store
+node_modules
+/build
+/.svelte-kit
+/package
+.env
+.env.*
+!.env.example
+# Ignore files for PNPM, NPM and YARN
+pnpm-lock.yaml
+package-lock.json
+yarn.lock

.eslintrc.cjs ADDED Viewed

	@@ -0,0 +1,31 @@

+module.exports = {
+	root: true,
+	extends: [
+		'eslint:recommended',
+		'plugin:@typescript-eslint/recommended',
+		'plugin:svelte/recommended',
+		'plugin:cypress/recommended',
+		'prettier'
+	],
+	parser: '@typescript-eslint/parser',
+	plugins: ['@typescript-eslint'],
+	parserOptions: {
+		sourceType: 'module',
+		ecmaVersion: 2020,
+		extraFileExtensions: ['.svelte']
+	},
+	env: {
+		browser: true,
+		es2017: true,
+		node: true
+	},
+	overrides: [
+		{
+			files: ['*.svelte'],
+			parser: 'svelte-eslint-parser',
+			parserOptions: {
+				parser: '@typescript-eslint/parser'
+			}
+		}
+	]
+};

.gitattributes ADDED Viewed

	@@ -0,0 +1,3 @@

+*.sh text eol=lf
+*.ttf filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text

.github/FUNDING.yml ADDED Viewed

	@@ -0,0 +1 @@


1	+ github: tjbck

.github/ISSUE_TEMPLATE/bug_report.md ADDED Viewed

	@@ -0,0 +1,80 @@

+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+---
+# Bug Report
+## Important Notes
+- **Before submitting a bug report**: Please check the Issues or Discussions section to see if a similar issue or feature request has already been posted. It's likely we're already tracking it! If you’re unsure, start a discussion post first. This will help us efficiently focus on improving the project.
+- **Collaborate respectfully**: We value a constructive attitude, so please be mindful of your communication. If negativity is part of your approach, our capacity to engage may be limited. We’re here to help if you’re open to learning and communicating positively. Remember, Open WebUI is a volunteer-driven project managed by a single maintainer and supported by contributors who also have full-time jobs. We appreciate your time and ask that you respect ours.
+- **Contributing**: If you encounter an issue, we highly encourage you to submit a pull request or fork the project. We actively work to prevent contributor burnout to maintain the quality and continuity of Open WebUI.
+- **Bug reproducibility**: If a bug cannot be reproduced with a `:main` or `:dev` Docker setup, or a pip install with Python 3.11, it may require additional help from the community. In such cases, we will move it to the "issues" Discussions section due to our limited resources. We encourage the community to assist with these issues. Remember, it’s not that the issue doesn’t exist; we need your help!
+Note: Please remove the notes above when submitting your post. Thank you for your understanding and support!
+---
+## Installation Method
+[Describe the method you used to install the project, e.g., git clone, Docker, pip, etc.]
+## Environment
+- **Open WebUI Version:** [e.g., v0.3.11]
+- **Ollama (if applicable):** [e.g., v0.2.0, v0.1.32-rc1]
+- **Operating System:** [e.g., Windows 10, macOS Big Sur, Ubuntu 20.04]
+- **Browser (if applicable):** [e.g., Chrome 100.0, Firefox 98.0]
+**Confirmation:**
+- [ ] I have read and followed all the instructions provided in the README.md.
+- [ ] I am on the latest version of both Open WebUI and Ollama.
+- [ ] I have included the browser console logs.
+- [ ] I have included the Docker container logs.
+- [ ] I have provided the exact steps to reproduce the bug in the "Steps to Reproduce" section below.
+## Expected Behavior:
+[Describe what you expected to happen.]
+## Actual Behavior:
+[Describe what actually happened.]
+## Description
+**Bug Summary:**
+[Provide a brief but clear summary of the bug]
+## Reproduction Details
+**Steps to Reproduce:**
+[Outline the steps to reproduce the bug. Be as detailed as possible.]
+## Logs and Screenshots
+**Browser Console Logs:**
+[Include relevant browser console logs, if applicable]
+**Docker Container Logs:**
+[Include relevant Docker container logs, if applicable]
+**Screenshots/Screen Recordings (if applicable):**
+[Attach any relevant screenshots to help illustrate the issue]
+## Additional Information
+[Include any additional details that may help in understanding and reproducing the issue. This could include specific configurations, error messages, or anything else relevant to the bug.]
+## Note
+If the bug report is incomplete or does not follow the provided instructions, it may not be addressed. Please ensure that you have followed the steps outlined in the README.md and troubleshooting.md documents, and provide all necessary information for us to reproduce and address the issue. Thank you!

.github/ISSUE_TEMPLATE/feature_request.md ADDED Viewed

	@@ -0,0 +1,35 @@

+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+---
+# Feature Request
+## Important Notes
+- **Before submitting a report**: Please check the Issues or Discussions section to see if a similar issue or feature request has already been posted. It's likely we're already tracking it! If you’re unsure, start a discussion post first. This will help us efficiently focus on improving the project.
+- **Collaborate respectfully**: We value a constructive attitude, so please be mindful of your communication. If negativity is part of your approach, our capacity to engage may be limited. We’re here to help if you’re open to learning and communicating positively. Remember, Open WebUI is a volunteer-driven project managed by a single maintainer and supported by contributors who also have full-time jobs. We appreciate your time and ask that you respect ours.
+- **Contributing**: If you encounter an issue, we highly encourage you to submit a pull request or fork the project. We actively work to prevent contributor burnout to maintain the quality and continuity of Open WebUI.
+- **Bug reproducibility**: If a bug cannot be reproduced with a `:main` or `:dev` Docker setup, or a pip install with Python 3.11, it may require additional help from the community. In such cases, we will move it to the "issues" Discussions section due to our limited resources. We encourage the community to assist with these issues. Remember, it’s not that the issue doesn’t exist; we need your help!
+Note: Please remove the notes above when submitting your post. Thank you for your understanding and support!
+---
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+**Additional context**
+Add any other context or screenshots about the feature request here.

.github/dependabot.yml ADDED Viewed

	@@ -0,0 +1,12 @@

+version: 2
+updates:
+  - package-ecosystem: pip
+    directory: '/backend'
+    schedule:
+      interval: monthly
+    target-branch: 'dev'
+  - package-ecosystem: 'github-actions'
+    directory: '/'
+    schedule:
+      # Check for updates to GitHub Actions every week
+      interval: monthly

.github/pull_request_template.md ADDED Viewed

	@@ -0,0 +1,72 @@

+# Pull Request Checklist
+### Note to first-time contributors: Please open a discussion post in [Discussions](https://github.com/open-webui/open-webui/discussions) and describe your changes before submitting a pull request.
+**Before submitting, make sure you've checked the following:**
+- [ ] **Target branch:** Please verify that the pull request targets the `dev` branch.
+- [ ] **Description:** Provide a concise description of the changes made in this pull request.
+- [ ] **Changelog:** Ensure a changelog entry following the format of [Keep a Changelog](https://keepachangelog.com/) is added at the bottom of the PR description.
+- [ ] **Documentation:** Have you updated relevant documentation [Open WebUI Docs](https://github.com/open-webui/docs), or other documentation sources?
+- [ ] **Dependencies:** Are there any new dependencies? Have you updated the dependency versions in the documentation?
+- [ ] **Testing:** Have you written and run sufficient tests for validating the changes?
+- [ ] **Code review:** Have you performed a self-review of your code, addressing any coding standard issues and ensuring adherence to the project's coding standards?
+- [ ] **Prefix:** To cleary categorize this pull request, prefix the pull request title, using one of the following:
+  - **BREAKING CHANGE**: Significant changes that may affect compatibility
+  - **build**: Changes that affect the build system or external dependencies
+  - **ci**: Changes to our continuous integration processes or workflows
+  - **chore**: Refactor, cleanup, or other non-functional code changes
+  - **docs**: Documentation update or addition
+  - **feat**: Introduces a new feature or enhancement to the codebase
+  - **fix**: Bug fix or error correction
+  - **i18n**: Internationalization or localization changes
+  - **perf**: Performance improvement
+  - **refactor**: Code restructuring for better maintainability, readability, or scalability
+  - **style**: Changes that do not affect the meaning of the code (white-space, formatting, missing semi-colons, etc.)
+  - **test**: Adding missing tests or correcting existing tests
+  - **WIP**: Work in progress, a temporary label for incomplete or ongoing work
+# Changelog Entry
+### Description
+- [Concisely describe the changes made in this pull request, including any relevant motivation and impact (e.g., fixing a bug, adding a feature, or improving performance)]
+### Added
+- [List any new features, functionalities, or additions]
+### Changed
+- [List any changes, updates, refactorings, or optimizations]
+### Deprecated
+- [List any deprecated functionality or features that have been removed]
+### Removed
+- [List any removed features, files, or functionalities]
+### Fixed
+- [List any fixes, corrections, or bug fixes]
+### Security
+- [List any new or updated security-related changes, including vulnerability fixes]
+### Breaking Changes
+- **BREAKING CHANGE**: [List any breaking changes affecting compatibility or functionality]
+---
+### Additional Information
+- [Insert any additional context, notes, or explanations for the changes]
+  - [Reference any related issues, commits, or other relevant information]
+### Screenshots or Videos
+- [Attach any relevant screenshots or videos demonstrating the changes]

.github/workflows/build-release.yml ADDED Viewed

	@@ -0,0 +1,72 @@

+name: Release
+on:
+  push:
+    branches:
+      - main # or whatever branch you want to use
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Check for changes in package.json
+        run: |
+          git diff --cached --diff-filter=d package.json || {
+            echo "No changes to package.json"
+            exit 1
+          }
+      - name: Get version number from package.json
+        id: get_version
+        run: |
+          VERSION=$(jq -r '.version' package.json)
+          echo "::set-output name=version::$VERSION"
+      - name: Extract latest CHANGELOG entry
+        id: changelog
+        run: |
+          CHANGELOG_CONTENT=$(awk 'BEGIN {print_section=0;} /^## \[/ {if (print_section == 0) {print_section=1;} else {exit;}} print_section {print;}' CHANGELOG.md)
+          CHANGELOG_ESCAPED=$(echo "$CHANGELOG_CONTENT" | sed ':a;N;$!ba;s/\n/%0A/g')
+          echo "Extracted latest release notes from CHANGELOG.md:"
+          echo -e "$CHANGELOG_CONTENT"
+          echo "::set-output name=content::$CHANGELOG_ESCAPED"
+      - name: Create GitHub release
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const changelog = `${{ steps.changelog.outputs.content }}`;
+            const release = await github.rest.repos.createRelease({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              tag_name: `v${{ steps.get_version.outputs.version }}`,
+              name: `v${{ steps.get_version.outputs.version }}`,
+              body: changelog,
+            })
+            console.log(`Created release ${release.data.html_url}`)
+      - name: Upload package to GitHub release
+        uses: actions/upload-artifact@v4
+        with:
+          name: package
+          path: |
+            .
+            !.git
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Trigger Docker build workflow
+        uses: actions/github-script@v7
+        with:
+          script: |
+            github.rest.actions.createWorkflowDispatch({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              workflow_id: 'docker-build.yaml',
+              ref: 'v${{ steps.get_version.outputs.version }}',
+            })

.github/workflows/deploy-to-hf-spaces.yml ADDED Viewed

	@@ -0,0 +1,63 @@

+name: Deploy to HuggingFace Spaces
+on:
+  push:
+    branches:
+      - dev
+      - main
+  workflow_dispatch:
+jobs:
+  check-secret:
+    runs-on: ubuntu-latest
+    outputs:
+      token-set: ${{ steps.check-key.outputs.defined }}
+    steps:
+      - id: check-key
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        if: "${{ env.HF_TOKEN != '' }}"
+        run: echo "defined=true" >> $GITHUB_OUTPUT
+  deploy:
+    runs-on: ubuntu-latest
+    needs: [check-secret]
+    if: needs.check-secret.outputs.token-set == 'true'
+    env:
+      HF_TOKEN: ${{ secrets.HF_TOKEN }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          lfs: true
+      - name: Remove git history
+        run: rm -rf .git
+      - name: Prepend YAML front matter to README.md
+        run: |
+          echo "---" > temp_readme.md
+          echo "title: Open WebUI" >> temp_readme.md
+          echo "emoji: 🐳" >> temp_readme.md
+          echo "colorFrom: purple" >> temp_readme.md
+          echo "colorTo: gray" >> temp_readme.md
+          echo "sdk: docker" >> temp_readme.md
+          echo "app_port: 8080" >> temp_readme.md
+          echo "---" >> temp_readme.md
+          cat README.md >> temp_readme.md
+          mv temp_readme.md README.md
+      - name: Configure git
+        run: |
+          git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git config --global user.name "github-actions[bot]"
+      - name: Set up Git and push to Space
+        run: |
+          git init --initial-branch=main
+          git lfs install
+          git lfs track "*.ttf"
+          git lfs track "*.jpg"
+          rm demo.gif
+          git add .
+          git commit -m "GitHub deploy: ${{ github.sha }}"
+          git push --force https://arcticaurora:${HF_TOKEN}@huggingface.co/spaces/arcticaurora/ai main

.github/workflows/docker-build.yaml ADDED Viewed

	@@ -0,0 +1,477 @@

+name: Create and publish Docker images with specific build args
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+      - dev
+    tags:
+      - v*
+env:
+  REGISTRY: ghcr.io
+jobs:
+  build-main-image:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    strategy:
+      fail-fast: false
+      matrix:
+        platform:
+          - linux/amd64
+          - linux/arm64
+    steps:
+      # GitHub Packages requires the entire repository name to be in lowercase
+      # although the repository owner has a lowercase username, this prevents some people from running actions after forking
+      - name: Set repository and image name to lowercase
+        run: |
+          echo "IMAGE_NAME=${IMAGE_NAME,,}" >>${GITHUB_ENV}
+          echo "FULL_IMAGE_NAME=ghcr.io/${IMAGE_NAME,,}" >>${GITHUB_ENV}
+        env:
+          IMAGE_NAME: '${{ github.repository }}'
+      - name: Prepare
+        run: |
+          platform=${{ matrix.platform }}
+          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata for Docker images (default latest tag)
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha,prefix=git-
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+          flavor: |
+            latest=${{ github.ref == 'refs/heads/main' }}
+      - name: Extract metadata for Docker cache
+        id: cache-meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            ${{ github.ref_type == 'tag' && 'type=raw,value=main' || '' }}
+          flavor: |
+            prefix=cache-${{ matrix.platform }}-
+            latest=false
+      - name: Build Docker image (latest)
+        uses: docker/build-push-action@v5
+        id: build
+        with:
+          context: .
+          push: true
+          platforms: ${{ matrix.platform }}
+          labels: ${{ steps.meta.outputs.labels }}
+          outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
+          cache-from: type=registry,ref=${{ steps.cache-meta.outputs.tags }}
+          cache-to: type=registry,ref=${{ steps.cache-meta.outputs.tags }},mode=max
+          build-args: |
+            BUILD_HASH=${{ github.sha }}
+      - name: Export digest
+        run: |
+          mkdir -p /tmp/digests
+          digest="${{ steps.build.outputs.digest }}"
+          touch "/tmp/digests/${digest#sha256:}"
+      - name: Upload digest
+        uses: actions/upload-artifact@v4
+        with:
+          name: digests-main-${{ env.PLATFORM_PAIR }}
+          path: /tmp/digests/*
+          if-no-files-found: error
+          retention-days: 1
+  build-cuda-image:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    strategy:
+      fail-fast: false
+      matrix:
+        platform:
+          - linux/amd64
+          - linux/arm64
+    steps:
+      # GitHub Packages requires the entire repository name to be in lowercase
+      # although the repository owner has a lowercase username, this prevents some people from running actions after forking
+      - name: Set repository and image name to lowercase
+        run: |
+          echo "IMAGE_NAME=${IMAGE_NAME,,}" >>${GITHUB_ENV}
+          echo "FULL_IMAGE_NAME=ghcr.io/${IMAGE_NAME,,}" >>${GITHUB_ENV}
+        env:
+          IMAGE_NAME: '${{ github.repository }}'
+      - name: Prepare
+        run: |
+          platform=${{ matrix.platform }}
+          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata for Docker images (cuda tag)
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha,prefix=git-
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=cuda
+          flavor: |
+            latest=${{ github.ref == 'refs/heads/main' }}
+            suffix=-cuda,onlatest=true
+      - name: Extract metadata for Docker cache
+        id: cache-meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            ${{ github.ref_type == 'tag' && 'type=raw,value=main' || '' }}
+          flavor: |
+            prefix=cache-cuda-${{ matrix.platform }}-
+            latest=false
+      - name: Build Docker image (cuda)
+        uses: docker/build-push-action@v5
+        id: build
+        with:
+          context: .
+          push: true
+          platforms: ${{ matrix.platform }}
+          labels: ${{ steps.meta.outputs.labels }}
+          outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
+          cache-from: type=registry,ref=${{ steps.cache-meta.outputs.tags }}
+          cache-to: type=registry,ref=${{ steps.cache-meta.outputs.tags }},mode=max
+          build-args: |
+            BUILD_HASH=${{ github.sha }}
+            USE_CUDA=true
+      - name: Export digest
+        run: |
+          mkdir -p /tmp/digests
+          digest="${{ steps.build.outputs.digest }}"
+          touch "/tmp/digests/${digest#sha256:}"
+      - name: Upload digest
+        uses: actions/upload-artifact@v4
+        with:
+          name: digests-cuda-${{ env.PLATFORM_PAIR }}
+          path: /tmp/digests/*
+          if-no-files-found: error
+          retention-days: 1
+  build-ollama-image:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    strategy:
+      fail-fast: false
+      matrix:
+        platform:
+          - linux/amd64
+          - linux/arm64
+    steps:
+      # GitHub Packages requires the entire repository name to be in lowercase
+      # although the repository owner has a lowercase username, this prevents some people from running actions after forking
+      - name: Set repository and image name to lowercase
+        run: |
+          echo "IMAGE_NAME=${IMAGE_NAME,,}" >>${GITHUB_ENV}
+          echo "FULL_IMAGE_NAME=ghcr.io/${IMAGE_NAME,,}" >>${GITHUB_ENV}
+        env:
+          IMAGE_NAME: '${{ github.repository }}'
+      - name: Prepare
+        run: |
+          platform=${{ matrix.platform }}
+          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata for Docker images (ollama tag)
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha,prefix=git-
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=ollama
+          flavor: |
+            latest=${{ github.ref == 'refs/heads/main' }}
+            suffix=-ollama,onlatest=true
+      - name: Extract metadata for Docker cache
+        id: cache-meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            ${{ github.ref_type == 'tag' && 'type=raw,value=main' || '' }}
+          flavor: |
+            prefix=cache-ollama-${{ matrix.platform }}-
+            latest=false
+      - name: Build Docker image (ollama)
+        uses: docker/build-push-action@v5
+        id: build
+        with:
+          context: .
+          push: true
+          platforms: ${{ matrix.platform }}
+          labels: ${{ steps.meta.outputs.labels }}
+          outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
+          cache-from: type=registry,ref=${{ steps.cache-meta.outputs.tags }}
+          cache-to: type=registry,ref=${{ steps.cache-meta.outputs.tags }},mode=max
+          build-args: |
+            BUILD_HASH=${{ github.sha }}
+            USE_OLLAMA=true
+      - name: Export digest
+        run: |
+          mkdir -p /tmp/digests
+          digest="${{ steps.build.outputs.digest }}"
+          touch "/tmp/digests/${digest#sha256:}"
+      - name: Upload digest
+        uses: actions/upload-artifact@v4
+        with:
+          name: digests-ollama-${{ env.PLATFORM_PAIR }}
+          path: /tmp/digests/*
+          if-no-files-found: error
+          retention-days: 1
+  merge-main-images:
+    runs-on: ubuntu-latest
+    needs: [build-main-image]
+    steps:
+      # GitHub Packages requires the entire repository name to be in lowercase
+      # although the repository owner has a lowercase username, this prevents some people from running actions after forking
+      - name: Set repository and image name to lowercase
+        run: |
+          echo "IMAGE_NAME=${IMAGE_NAME,,}" >>${GITHUB_ENV}
+          echo "FULL_IMAGE_NAME=ghcr.io/${IMAGE_NAME,,}" >>${GITHUB_ENV}
+        env:
+          IMAGE_NAME: '${{ github.repository }}'
+      - name: Download digests
+        uses: actions/download-artifact@v4
+        with:
+          pattern: digests-main-*
+          path: /tmp/digests
+          merge-multiple: true
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata for Docker images (default latest tag)
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha,prefix=git-
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+          flavor: |
+            latest=${{ github.ref == 'refs/heads/main' }}
+      - name: Create manifest list and push
+        working-directory: /tmp/digests
+        run: |
+          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
+            $(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *)
+      - name: Inspect image
+        run: |
+          docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }}
+  merge-cuda-images:
+    runs-on: ubuntu-latest
+    needs: [build-cuda-image]
+    steps:
+      # GitHub Packages requires the entire repository name to be in lowercase
+      # although the repository owner has a lowercase username, this prevents some people from running actions after forking
+      - name: Set repository and image name to lowercase
+        run: |
+          echo "IMAGE_NAME=${IMAGE_NAME,,}" >>${GITHUB_ENV}
+          echo "FULL_IMAGE_NAME=ghcr.io/${IMAGE_NAME,,}" >>${GITHUB_ENV}
+        env:
+          IMAGE_NAME: '${{ github.repository }}'
+      - name: Download digests
+        uses: actions/download-artifact@v4
+        with:
+          pattern: digests-cuda-*
+          path: /tmp/digests
+          merge-multiple: true
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata for Docker images (default latest tag)
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha,prefix=git-
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=cuda
+          flavor: |
+            latest=${{ github.ref == 'refs/heads/main' }}
+            suffix=-cuda,onlatest=true
+      - name: Create manifest list and push
+        working-directory: /tmp/digests
+        run: |
+          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
+            $(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *)
+      - name: Inspect image
+        run: |
+          docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }}
+  merge-ollama-images:
+    runs-on: ubuntu-latest
+    needs: [build-ollama-image]
+    steps:
+      # GitHub Packages requires the entire repository name to be in lowercase
+      # although the repository owner has a lowercase username, this prevents some people from running actions after forking
+      - name: Set repository and image name to lowercase
+        run: |
+          echo "IMAGE_NAME=${IMAGE_NAME,,}" >>${GITHUB_ENV}
+          echo "FULL_IMAGE_NAME=ghcr.io/${IMAGE_NAME,,}" >>${GITHUB_ENV}
+        env:
+          IMAGE_NAME: '${{ github.repository }}'
+      - name: Download digests
+        uses: actions/download-artifact@v4
+        with:
+          pattern: digests-ollama-*
+          path: /tmp/digests
+          merge-multiple: true
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata for Docker images (default ollama tag)
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha,prefix=git-
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=ollama
+          flavor: |
+            latest=${{ github.ref == 'refs/heads/main' }}
+            suffix=-ollama,onlatest=true
+      - name: Create manifest list and push
+        working-directory: /tmp/digests
+        run: |
+          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
+            $(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *)
+      - name: Inspect image
+        run: |
+          docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }}

.github/workflows/format-backend.yaml ADDED Viewed

	@@ -0,0 +1,39 @@

+name: Python CI
+on:
+  push:
+    branches:
+      - main
+      - dev
+  pull_request:
+    branches:
+      - main
+      - dev
+jobs:
+  build:
+    name: 'Format Backend'
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.11]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install black
+      - name: Format backend
+        run: npm run format:backend
+      - name: Check for changes after format
+        run: git diff --exit-code

.github/workflows/format-build-frontend.yaml ADDED Viewed

	@@ -0,0 +1,57 @@

+name: Frontend Build
+on:
+  push:
+    branches:
+      - main
+      - dev
+  pull_request:
+    branches:
+      - main
+      - dev
+jobs:
+  build:
+    name: 'Format & Build Frontend'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '22' # Or specify any other version you want to use
+      - name: Install Dependencies
+        run: npm install
+      - name: Format Frontend
+        run: npm run format
+      - name: Run i18next
+        run: npm run i18n:parse
+      - name: Check for Changes After Format
+        run: git diff --exit-code
+      - name: Build Frontend
+        run: npm run build
+  test-frontend:
+    name: 'Frontend Unit Tests'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+      - name: Install Dependencies
+        run: npm ci
+      - name: Run vitest
+        run: npm run test:frontend

.github/workflows/integration-test.yml ADDED Viewed

	@@ -0,0 +1,253 @@

+name: Integration Test
+on:
+  push:
+    branches:
+      - main
+      - dev
+  pull_request:
+    branches:
+      - main
+      - dev
+jobs:
+  cypress-run:
+    name: Run Cypress Integration Tests
+    runs-on: ubuntu-latest
+    steps:
+      - name: Maximize build space
+        uses: AdityaGarg8/remove-unwanted-software@v4.1
+        with:
+          remove-android: 'true'
+          remove-haskell: 'true'
+          remove-codeql: 'true'
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+      - name: Build and run Compose Stack
+        run: |
+          docker compose \
+            --file docker-compose.yaml \
+            --file docker-compose.api.yaml \
+            --file docker-compose.a1111-test.yaml \
+            up --detach --build
+      - name: Delete Docker build cache
+        run: |
+          docker builder prune --all --force
+      - name: Wait for Ollama to be up
+        timeout-minutes: 5
+        run: |
+          until curl --output /dev/null --silent --fail http://localhost:11434; do
+            printf '.'
+            sleep 1
+          done
+          echo "Service is up!"
+      - name: Preload Ollama model
+        run: |
+          docker exec ollama ollama pull qwen:0.5b-chat-v1.5-q2_K
+      - name: Cypress run
+        uses: cypress-io/github-action@v6
+        with:
+          browser: chrome
+          wait-on: 'http://localhost:3000'
+          config: baseUrl=http://localhost:3000
+      - uses: actions/upload-artifact@v4
+        if: always()
+        name: Upload Cypress videos
+        with:
+          name: cypress-videos
+          path: cypress/videos
+          if-no-files-found: ignore
+      - name: Extract Compose logs
+        if: always()
+        run: |
+          docker compose logs > compose-logs.txt
+      - uses: actions/upload-artifact@v4
+        if: always()
+        name: Upload Compose logs
+        with:
+          name: compose-logs
+          path: compose-logs.txt
+          if-no-files-found: ignore
+  # pytest:
+  #   name: Run Backend Tests
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #     - name: Set up Python
+  #       uses: actions/setup-python@v5
+  #       with:
+  #         python-version: ${{ matrix.python-version }}
+  #     - name: Install dependencies
+  #       run: |
+  #         python -m pip install --upgrade pip
+  #         pip install -r backend/requirements.txt
+  #     - name: pytest run
+  #       run: |
+  #         ls -al
+  #         cd backend
+  #         PYTHONPATH=. pytest . -o log_cli=true -o log_cli_level=INFO
+  migration_test:
+    name: Run Migration Tests
+    runs-on: ubuntu-latest
+    services:
+      postgres:
+        image: postgres
+        env:
+          POSTGRES_PASSWORD: postgres
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+        ports:
+          - 5432:5432
+    #      mysql:
+    #        image: mysql
+    #        env:
+    #          MYSQL_ROOT_PASSWORD: mysql
+    #          MYSQL_DATABASE: mysql
+    #        options: >-
+    #          --health-cmd "mysqladmin ping -h localhost"
+    #          --health-interval 10s
+    #          --health-timeout 5s
+    #          --health-retries 5
+    #        ports:
+    #          - 3306:3306
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Set up uv
+        uses: yezz123/setup-uv@v4
+        with:
+          uv-venv: venv
+      - name: Activate virtualenv
+        run: |
+          . venv/bin/activate
+          echo PATH=$PATH >> $GITHUB_ENV
+      - name: Install dependencies
+        run: |
+          uv pip install -r backend/requirements.txt
+      - name: Test backend with SQLite
+        id: sqlite
+        env:
+          WEBUI_SECRET_KEY: secret-key
+          GLOBAL_LOG_LEVEL: debug
+        run: |
+          cd backend
+          uvicorn open_webui.main:app --port "8080" --forwarded-allow-ips '*' &
+          UVICORN_PID=$!
+          # Wait up to 40 seconds for the server to start
+          for i in {1..40}; do
+              curl -s http://localhost:8080/api/config > /dev/null && break
+              sleep 1
+              if [ $i -eq 40 ]; then
+                  echo "Server failed to start"
+                  kill -9 $UVICORN_PID
+                  exit 1
+              fi
+          done
+          # Check that the server is still running after 5 seconds
+          sleep 5
+          if ! kill -0 $UVICORN_PID; then
+              echo "Server has stopped"
+              exit 1
+          fi
+      - name: Test backend with Postgres
+        if: success() || steps.sqlite.conclusion == 'failure'
+        env:
+          WEBUI_SECRET_KEY: secret-key
+          GLOBAL_LOG_LEVEL: debug
+          DATABASE_URL: postgresql://postgres:postgres@localhost:5432/postgres
+          DATABASE_POOL_SIZE: 10
+          DATABASE_POOL_MAX_OVERFLOW: 10
+          DATABASE_POOL_TIMEOUT: 30
+        run: |
+          cd backend
+          uvicorn open_webui.main:app --port "8081" --forwarded-allow-ips '*' &
+          UVICORN_PID=$!
+          # Wait up to 20 seconds for the server to start
+          for i in {1..20}; do
+              curl -s http://localhost:8081/api/config > /dev/null && break
+              sleep 1
+              if [ $i -eq 20 ]; then
+                  echo "Server failed to start"
+                  kill -9 $UVICORN_PID
+                  exit 1
+              fi
+          done
+          # Check that the server is still running after 5 seconds
+          sleep 5
+          if ! kill -0 $UVICORN_PID; then
+              echo "Server has stopped"
+              exit 1
+          fi
+          # Check that service will reconnect to postgres when connection will be closed
+          status_code=$(curl --write-out %{http_code} -s --output /dev/null http://localhost:8081/health/db)
+          if [[ "$status_code" -ne 200 ]] ; then
+            echo "Server has failed before postgres reconnect check"
+            exit 1
+          fi
+          echo "Terminating all connections to postgres..."
+          python -c "import os, psycopg2 as pg2; \
+            conn = pg2.connect(dsn=os.environ['DATABASE_URL'].replace('+pool', '')); \
+            cur = conn.cursor(); \
+            cur.execute('SELECT pg_terminate_backend(psa.pid) FROM pg_stat_activity psa WHERE datname = current_database() AND pid <> pg_backend_pid();')"
+          status_code=$(curl --write-out %{http_code} -s --output /dev/null http://localhost:8081/health/db)
+          if [[ "$status_code" -ne 200 ]] ; then
+            echo "Server has not reconnected to postgres after connection was closed: returned status $status_code"
+            exit 1
+          fi
+#      - name: Test backend with MySQL
+#        if: success() || steps.sqlite.conclusion == 'failure' || steps.postgres.conclusion == 'failure'
+#        env:
+#          WEBUI_SECRET_KEY: secret-key
+#          GLOBAL_LOG_LEVEL: debug
+#          DATABASE_URL: mysql://root:mysql@localhost:3306/mysql
+#        run: |
+#          cd backend
+#          uvicorn open_webui.main:app --port "8083" --forwarded-allow-ips '*' &
+#          UVICORN_PID=$!
+#          # Wait up to 20 seconds for the server to start
+#          for i in {1..20}; do
+#              curl -s http://localhost:8083/api/config > /dev/null && break
+#              sleep 1
+#              if [ $i -eq 20 ]; then
+#                  echo "Server failed to start"
+#                  kill -9 $UVICORN_PID
+#                  exit 1
+#              fi
+#          done
+#          # Check that the server is still running after 5 seconds
+#          sleep 5
+#          if ! kill -0 $UVICORN_PID; then
+#              echo "Server has stopped"
+#              exit 1
+#          fi

.github/workflows/lint-backend.disabled ADDED Viewed

	@@ -0,0 +1,27 @@

+name: Python CI
+on:
+  push:
+    branches: ['main']
+  pull_request:
+jobs:
+  build:
+    name: 'Lint Backend'
+    env:
+      PUBLIC_API_BASE_URL: ''
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        node-version:
+          - latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Use Python
+        uses: actions/setup-python@v5
+      - name: Use Bun
+        uses: oven-sh/setup-bun@v1
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install pylint
+      - name: Lint backend
+        run: bun run lint:backend

.github/workflows/lint-frontend.disabled ADDED Viewed

	@@ -0,0 +1,21 @@

+name: Bun CI
+on:
+  push:
+    branches: ['main']
+  pull_request:
+jobs:
+  build:
+    name: 'Lint Frontend'
+    env:
+      PUBLIC_API_BASE_URL: ''
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Use Bun
+        uses: oven-sh/setup-bun@v1
+      - run: bun --version
+      - name: Install frontend dependencies
+        run: bun install --frozen-lockfile
+      - run: bun run lint:frontend
+      - run: bun run lint:types
+        if: success() || failure()

.github/workflows/release-pypi.yml ADDED Viewed

	@@ -0,0 +1,32 @@

+name: Release to PyPI
+on:
+  push:
+    branches:
+      - main # or whatever branch you want to use
+      - pypi-release
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/open-webui
+    permissions:
+      id-token: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 18
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.11
+      - name: Build
+        run: |
+          python -m pip install --upgrade pip
+          pip install build
+          python -m build .
+      - name: Publish package distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

.gitignore ADDED Viewed

	@@ -0,0 +1,310 @@

+.DS_Store
+node_modules
+/build
+/.svelte-kit
+/package
+.myenv/
+.env
+.env.*
+!.env.example
+vite.config.js.timestamp-*
+vite.config.ts.timestamp-*
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Pyodide distribution
+static/pyodide/*
+!static/pyodide/pyodide-lock.json
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+.pnpm-debug.log*
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+# nyc test coverage
+.nyc_output
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+# Bower dependency directory (https://bower.io/)
+bower_components
+# node-waf configuration
+.lock-wscript
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+# Dependency directories
+node_modules/
+jspm_packages/
+# Snowpack dependency directory (https://snowpack.dev/)
+web_modules/
+# TypeScript cache
+*.tsbuildinfo
+# Optional npm cache directory
+.npm
+# Optional eslint cache
+.eslintcache
+# Optional stylelint cache
+.stylelintcache
+# Microbundle cache
+.rpt2_cache/
+.rts2_cache_cjs/
+.rts2_cache_es/
+.rts2_cache_umd/
+# Optional REPL history
+.node_repl_history
+# Output of 'npm pack'
+*.tgz
+# Yarn Integrity file
+.yarn-integrity
+# dotenv environment variable files
+.env
+.env.development.local
+.env.test.local
+.env.production.local
+.env.local
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+.parcel-cache
+# Next.js build output
+.next
+out
+# Nuxt.js build / generate output
+.nuxt
+dist
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+# vuepress build output
+.vuepress/dist
+# vuepress v2.x temp and cache directory
+.temp
+.cache
+# Docusaurus cache and generated files
+.docusaurus
+# Serverless directories
+.serverless/
+# FuseBox cache
+.fusebox/
+# DynamoDB Local files
+.dynamodb/
+# TernJS port file
+.tern-port
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+# yarn v2
+.yarn/cache
+.yarn/unplugged
+.yarn/build-state.yml
+.yarn/install-state.gz
+.pnp.*
+# cypress artifacts
+cypress/videos
+cypress/screenshots
+.vscode/settings.json

.npmrc ADDED Viewed

	@@ -0,0 +1 @@


1	+ engine-strict=true

.prettierignore ADDED Viewed

	@@ -0,0 +1,316 @@

+# Ignore files for PNPM, NPM and YARN
+pnpm-lock.yaml
+package-lock.json
+yarn.lock
+kubernetes/
+# Copy of .gitignore
+.DS_Store
+node_modules
+/build
+/.svelte-kit
+/package
+.env
+.env.*
+!.env.example
+vite.config.js.timestamp-*
+vite.config.ts.timestamp-*
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+.pnpm-debug.log*
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+# nyc test coverage
+.nyc_output
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+# Bower dependency directory (https://bower.io/)
+bower_components
+# node-waf configuration
+.lock-wscript
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+# Dependency directories
+node_modules/
+jspm_packages/
+# Snowpack dependency directory (https://snowpack.dev/)
+web_modules/
+# TypeScript cache
+*.tsbuildinfo
+# Optional npm cache directory
+.npm
+# Optional eslint cache
+.eslintcache
+# Optional stylelint cache
+.stylelintcache
+# Microbundle cache
+.rpt2_cache/
+.rts2_cache_cjs/
+.rts2_cache_es/
+.rts2_cache_umd/
+# Optional REPL history
+.node_repl_history
+# Output of 'npm pack'
+*.tgz
+# Yarn Integrity file
+.yarn-integrity
+# dotenv environment variable files
+.env
+.env.development.local
+.env.test.local
+.env.production.local
+.env.local
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+.parcel-cache
+# Next.js build output
+.next
+out
+# Nuxt.js build / generate output
+.nuxt
+dist
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+# vuepress build output
+.vuepress/dist
+# vuepress v2.x temp and cache directory
+.temp
+.cache
+# Docusaurus cache and generated files
+.docusaurus
+# Serverless directories
+.serverless/
+# FuseBox cache
+.fusebox/
+# DynamoDB Local files
+.dynamodb/
+# TernJS port file
+.tern-port
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+# yarn v2
+.yarn/cache
+.yarn/unplugged
+.yarn/build-state.yml
+.yarn/install-state.gz
+.pnp.*
+# cypress artifacts
+cypress/videos
+cypress/screenshots
+/static/*

.prettierrc ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+	"useTabs": true,
+	"singleQuote": true,
+	"trailingComma": "none",
+	"printWidth": 100,
+	"plugins": ["prettier-plugin-svelte"],
+	"pluginSearchDirs": ["."],
+	"overrides": [{ "files": "*.svelte", "options": { "parser": "svelte" } }]
+}

CHANGELOG.md ADDED Viewed

The diff for this file is too large to render. See raw diff

CODE_OF_CONDUCT.md ADDED Viewed

	@@ -0,0 +1,99 @@

+# Contributor Covenant Code of Conduct
+## Our Pledge
+As members, contributors, and leaders of this community, we pledge to make participation in our open-source project a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
+We are committed to creating and maintaining an open, respectful, and professional environment where positive contributions and meaningful discussions can flourish. By participating in this project, you agree to uphold these values and align your behavior to the standards outlined in this Code of Conduct.
+## Why These Standards Are Important
+Open-source projects rely on a community of volunteers dedicating their time, expertise, and effort toward a shared goal. These projects are inherently collaborative but also fragile, as the success of the project depends on the goodwill, energy, and productivity of those involved.
+Maintaining a positive and respectful environment is essential to safeguarding the integrity of this project and protecting contributors' efforts. Behavior that disrupts this atmosphere—whether through hostility, entitlement, or unprofessional conduct—can severely harm the morale and productivity of the community. **Strict enforcement of these standards ensures a safe and supportive space for meaningful collaboration.**
+This is a community where **respect and professionalism are mandatory.** Violations of these standards will result in **zero tolerance** and immediate enforcement to prevent disruption and ensure the well-being of all participants.
+## Our Standards
+Examples of behavior that contribute to a positive and professional community include:
+- **Respecting others.** Be considerate, listen actively, and engage with empathy toward others' viewpoints and experiences.
+- **Constructive feedback.** Provide actionable, thoughtful, and respectful feedback that helps improve the project and encourages collaboration. Avoid unproductive negativity or hypercriticism.
+- **Recognizing volunteer contributions.** Appreciate that contributors dedicate their free time and resources selflessly. Approach them with gratitude and patience.
+- **Focusing on shared goals.** Collaborate in ways that prioritize the health, success, and sustainability of the community over individual agendas.
+Examples of unacceptable behavior include:
+- The use of discriminatory, demeaning, or sexualized language or behavior.
+- Personal attacks, derogatory comments, trolling, or inflammatory political or ideological arguments.
+- Harassment, intimidation, or any behavior intended to create a hostile, uncomfortable, or unsafe environment.
+- Publishing others' private information (e.g., physical or email addresses) without explicit permission.
+- **Entitlement, demand, or aggression toward contributors.** Volunteers are under no obligation to provide immediate or personalized support. Rude or dismissive behavior will not be tolerated.
+- **Unproductive or destructive behavior.** This includes venting frustration as hostility ("tantrums"), hypercriticism, attention-seeking negativity, or anything that distracts from the project's goals.
+- **Spamming and promotional exploitation.** Sharing irrelevant product promotions or self-promotion in the community is not allowed unless it directly contributes value to the discussion.
+### Feedback and Community Engagement
+- **Constructive feedback is encouraged, but hostile or entitled behavior will result in immediate action.** If you disagree with elements of the project, we encourage you to offer meaningful improvements or fork the project if necessary. Healthy discussions and technical disagreements are welcome only when handled with professionalism.
+- **Respect contributors' time and efforts.** No one is entitled to personalized or on-demand assistance. This is a community built on collaboration and shared effort; demanding or demeaning behavior undermines that trust and will not be allowed.
+### Zero Tolerance: No Warnings, Immediate Action
+This community operates under a **zero-tolerance policy.** Any behavior deemed unacceptable under this Code of Conduct will result in **immediate enforcement, without prior warning.**
+We employ this approach to ensure that unproductive or disruptive behavior does not escalate further or cause unnecessary harm to other contributors. The standards are clear, and violations of any kind—whether mild or severe—will be addressed decisively to protect the community.
+## Enforcement Responsibilities
+Community leaders are responsible for upholding and enforcing these standards. They are empowered to take **immediate and appropriate action** to address any behaviors they deem unacceptable under this Code of Conduct. These actions are taken with the goal of protecting the community and preserving its safe, positive, and productive environment.
+## Scope
+This Code of Conduct applies to all community spaces, including forums, repositories, social media accounts, and in-person events. It also applies when an individual represents the community in public settings, such as conferences or official communications.
+Additionally, any behavior outside of these defined spaces that negatively impacts the community or its members may fall within the scope of this Code of Conduct.
+## Reporting Violations
+Instances of unacceptable behavior can be reported to the leadership team at **hello@openwebui.com**. Reports will be handled promptly, confidentially, and with consideration for the safety and well-being of the reporter.
+All community leaders are required to uphold confidentiality and impartiality when addressing reports of violations.
+## Enforcement Guidelines
+### Ban
+**Community Impact**: Community leaders will issue a ban to any participant whose behavior is deemed unacceptable according to this Code of Conduct. Bans are enforced immediately and without prior notice.
+A ban may be temporary or permanent, depending on the severity of the violation. This includes—but is not limited to—behavior such as:
+- Harassment or abusive behavior toward contributors.
+- Persistent negativity or hostility that disrupts the collaborative environment.
+- Disrespectful, demanding, or aggressive interactions with others.
+- Attempts to cause harm or sabotage the community.
+**Consequence**: A banned individual is immediately removed from access to all community spaces, communication channels, and events. Community leaders reserve the right to enforce either a time-limited suspension or a permanent ban based on the specific circumstances of the violation.
+This approach ensures that disruptive behaviors are addressed swiftly and decisively in order to maintain the integrity and productivity of the community.
+## Why Zero Tolerance Is Necessary
+Open-source projects thrive on collaboration, goodwill, and mutual respect. Toxic behaviors—such as entitlement, hostility, or persistent negativity—threaten not just individual contributors but the health of the project as a whole. Allowing such behaviors to persist robs contributors of their time, energy, and enthusiasm for the work they do.
+By enforcing a zero-tolerance policy, we ensure that the community remains a safe, welcoming space for all participants. These measures are not about harshness—they are about protecting contributors and fostering a productive environment where innovation can thrive.
+Our expectations are clear, and our enforcement reflects our commitment to this project's long-term success.
+## Attribution
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
+Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity).
+[homepage]: https://www.contributor-covenant.org
+For answers to common questions about this code of conduct, see the FAQ at
+https://www.contributor-covenant.org/faq. Translations are available at
+https://www.contributor-covenant.org/translations.

Caddyfile.localhost ADDED Viewed

	@@ -0,0 +1,64 @@

+# Run with
+#    caddy run --envfile ./example.env --config ./Caddyfile.localhost
+#
+# This is configured for
+#    - Automatic HTTPS (even for localhost)
+#    - Reverse Proxying to Ollama API Base URL (http://localhost:11434/api)
+#    - CORS
+#    - HTTP Basic Auth API Tokens (uncomment basicauth section)
+# CORS Preflight (OPTIONS) + Request (GET, POST, PATCH, PUT, DELETE)
+(cors-api) {
+	@match-cors-api-preflight method OPTIONS
+	handle @match-cors-api-preflight {
+		header {
+			Access-Control-Allow-Origin "{http.request.header.origin}"
+			Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE, OPTIONS"
+			Access-Control-Allow-Headers "Origin, Accept, Authorization, Content-Type, X-Requested-With"
+			Access-Control-Allow-Credentials "true"
+			Access-Control-Max-Age "3600"
+			defer
+		}
+		respond "" 204
+	}
+	@match-cors-api-request {
+		not {
+			header Origin "{http.request.scheme}://{http.request.host}"
+		}
+		header Origin "{http.request.header.origin}"
+	}
+	handle @match-cors-api-request {
+		header {
+			Access-Control-Allow-Origin "{http.request.header.origin}"
+			Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE, OPTIONS"
+			Access-Control-Allow-Headers "Origin, Accept, Authorization, Content-Type, X-Requested-With"
+			Access-Control-Allow-Credentials "true"
+			Access-Control-Max-Age "3600"
+			defer
+		}
+	}
+}
+# replace localhost with example.com or whatever
+localhost {
+	## HTTP Basic Auth
+	## (uncomment to enable)
+	# basicauth {
+	# 	# see .example.env for how to generate tokens
+	# 	{env.OLLAMA_API_ID} {env.OLLAMA_API_TOKEN_DIGEST}
+	# }
+	handle /api/* {
+		# Comment to disable CORS
+		import cors-api
+		reverse_proxy localhost:11434
+	}
+	# Same-Origin Static Web Server
+	file_server {
+		root ./build/
+	}
+}

Dockerfile ADDED Viewed

	@@ -0,0 +1,176 @@

+# syntax=docker/dockerfile:1
+# Initialize device type args
+# use build args in the docker build command with --build-arg="BUILDARG=true"
+ARG USE_CUDA=false
+ARG USE_OLLAMA=false
+# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default)
+ARG USE_CUDA_VER=cu121
+# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
+# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
+# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
+# IMPORTANT: If you change the embedding model (sentence-transformers/all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
+ARG USE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
+ARG USE_RERANKING_MODEL=""
+# Tiktoken encoding name; models to use can be found at https://huggingface.co/models?library=tiktoken
+ARG USE_TIKTOKEN_ENCODING_NAME="cl100k_base"
+ARG BUILD_HASH=dev-build
+# Override at your own risk - non-root configurations are untested
+ARG UID=0
+ARG GID=0
+######## WebUI frontend ########
+FROM --platform=$BUILDPLATFORM node:22-alpine3.20 AS build
+ARG BUILD_HASH
+WORKDIR /app
+COPY package.json package-lock.json ./
+RUN npm ci
+COPY . .
+ENV APP_BUILD_HASH=${BUILD_HASH}
+RUN npm run build
+######## WebUI backend ########
+FROM python:3.11-slim-bookworm AS base
+# Use args
+ARG USE_CUDA
+ARG USE_OLLAMA
+ARG USE_CUDA_VER
+ARG USE_EMBEDDING_MODEL
+ARG USE_RERANKING_MODEL
+ARG UID
+ARG GID
+## Basis ##
+ENV ENV=prod \
+    PORT=8080 \
+    # pass build args to the build
+    USE_OLLAMA_DOCKER=${USE_OLLAMA} \
+    USE_CUDA_DOCKER=${USE_CUDA} \
+    USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \
+    USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} \
+    USE_RERANKING_MODEL_DOCKER=${USE_RERANKING_MODEL}
+## Basis URL Config ##
+ENV OLLAMA_BASE_URL="/ollama" \
+    OPENAI_API_BASE_URL=""
+## API Key and Security Config ##
+ENV OPENAI_API_KEY="" \
+    WEBUI_SECRET_KEY="" \
+    SCARF_NO_ANALYTICS=true \
+    DO_NOT_TRACK=true \
+    ANONYMIZED_TELEMETRY=false
+#### Other models #########################################################
+## whisper TTS model settings ##
+ENV WHISPER_MODEL="base" \
+    WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
+## RAG Embedding model settings ##
+ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \
+    RAG_RERANKING_MODEL="$USE_RERANKING_MODEL_DOCKER" \
+    SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models"
+## Tiktoken model settings ##
+ENV TIKTOKEN_ENCODING_NAME="cl100k_base" \
+    TIKTOKEN_CACHE_DIR="/app/backend/data/cache/tiktoken"
+## Hugging Face download cache ##
+ENV HF_HOME="/app/backend/data/cache/embedding/models"
+## Torch Extensions ##
+# ENV TORCH_EXTENSIONS_DIR="/.cache/torch_extensions"
+#### Other models ##########################################################
+WORKDIR /app/backend
+ENV HOME=/root
+# Create user and group if not root
+RUN if [ $UID -ne 0 ]; then \
+    if [ $GID -ne 0 ]; then \
+    addgroup --gid $GID app; \
+    fi; \
+    adduser --uid $UID --gid $GID --home $HOME --disabled-password --no-create-home app; \
+    fi
+RUN mkdir -p $HOME/.cache/chroma
+RUN echo -n 00000000-0000-0000-0000-000000000000 > $HOME/.cache/chroma/telemetry_user_id
+# Make sure the user has access to the app and root directory
+RUN chown -R $UID:$GID /app $HOME
+RUN if [ "$USE_OLLAMA" = "true" ]; then \
+    apt-get update && \
+    # Install pandoc and netcat
+    apt-get install -y --no-install-recommends git build-essential pandoc netcat-openbsd curl && \
+    apt-get install -y --no-install-recommends gcc python3-dev && \
+    # for RAG OCR
+    apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
+    # install helper tools
+    apt-get install -y --no-install-recommends curl jq && \
+    # install ollama
+    curl -fsSL https://ollama.com/install.sh | sh && \
+    # cleanup
+    rm -rf /var/lib/apt/lists/*; \
+    else \
+    apt-get update && \
+    # Install pandoc, netcat and gcc
+    apt-get install -y --no-install-recommends git build-essential pandoc gcc netcat-openbsd curl jq && \
+    apt-get install -y --no-install-recommends gcc python3-dev && \
+    # for RAG OCR
+    apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
+    # cleanup
+    rm -rf /var/lib/apt/lists/*; \
+    fi
+# install python dependencies
+COPY --chown=$UID:$GID ./backend/requirements.txt ./requirements.txt
+RUN pip3 install uv && \
+    if [ "$USE_CUDA" = "true" ]; then \
+    # If you use CUDA the whisper and embedding model will be downloaded on first use
+    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
+    uv pip install --system -r requirements.txt --no-cache-dir && \
+    python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \
+    python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
+    python -c "import os; import tiktoken; tiktoken.get_encoding(os.environ['TIKTOKEN_ENCODING_NAME'])"; \
+    else \
+    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
+    uv pip install --system -r requirements.txt --no-cache-dir && \
+    python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \
+    python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
+    python -c "import os; import tiktoken; tiktoken.get_encoding(os.environ['TIKTOKEN_ENCODING_NAME'])"; \
+    fi; \
+    chown -R $UID:$GID /app/backend/data/
+# copy embedding weight from build
+# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
+# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
+# copy built frontend files
+COPY --chown=$UID:$GID --from=build /app/build /app/build
+COPY --chown=$UID:$GID --from=build /app/CHANGELOG.md /app/CHANGELOG.md
+COPY --chown=$UID:$GID --from=build /app/package.json /app/package.json
+# copy backend files
+COPY --chown=$UID:$GID ./backend .
+EXPOSE 8080
+HEALTHCHECK CMD curl --silent --fail http://localhost:${PORT:-8080}/health | jq -ne 'input.status == true' || exit 1
+USER $UID:$GID
+ARG BUILD_HASH
+ENV WEBUI_BUILD_VERSION=${BUILD_HASH}
+ENV DOCKER=true
+CMD [ "bash", "start.sh"]

INSTALLATION.md ADDED Viewed

	@@ -0,0 +1,35 @@

+### Installing Both Ollama and Open WebUI Using Kustomize
+For cpu-only pod
+```bash
+kubectl apply -f ./kubernetes/manifest/base
+```
+For gpu-enabled pod
+```bash
+kubectl apply -k ./kubernetes/manifest
+```
+### Installing Both Ollama and Open WebUI Using Helm
+Package Helm file first
+```bash
+helm package ./kubernetes/helm/
+```
+For cpu-only pod
+```bash
+helm install ollama-webui ./ollama-webui-*.tgz
+```
+For gpu-enabled pod
+```bash
+helm install ollama-webui ./ollama-webui-*.tgz --set ollama.resources.limits.nvidia.com/gpu="1"
+```
+Check the `kubernetes/helm/values.yaml` file to know which parameters are available for customization

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 Timothy Jaeryang Baek
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

Makefile ADDED Viewed

	@@ -0,0 +1,33 @@

+ifneq ($(shell which docker-compose 2>/dev/null),)
+    DOCKER_COMPOSE := docker-compose
+else
+    DOCKER_COMPOSE := docker compose
+endif
+install:
+	$(DOCKER_COMPOSE) up -d
+remove:
+	@chmod +x confirm_remove.sh
+	@./confirm_remove.sh
+start:
+	$(DOCKER_COMPOSE) start
+startAndBuild:
+	$(DOCKER_COMPOSE) up -d --build
+stop:
+	$(DOCKER_COMPOSE) stop
+update:
+	# Calls the LLM update script
+	chmod +x update_ollama_models.sh
+	@./update_ollama_models.sh
+	@git pull
+	$(DOCKER_COMPOSE) down
+	# Make sure the ollama-webui container is stopped before rebuilding
+	@docker stop open-webui || true
+	$(DOCKER_COMPOSE) up --build -d
+	$(DOCKER_COMPOSE) start

README.md ADDED Viewed

	@@ -0,0 +1,221 @@

+---
+title: Open WebUI
+emoji: 🐳
+colorFrom: purple
+colorTo: gray
+sdk: docker
+app_port: 8080
+---
+# Open WebUI 👋
+![GitHub stars](https://img.shields.io/github/stars/open-webui/open-webui?style=social)
+![GitHub forks](https://img.shields.io/github/forks/open-webui/open-webui?style=social)
+![GitHub watchers](https://img.shields.io/github/watchers/open-webui/open-webui?style=social)
+![GitHub repo size](https://img.shields.io/github/repo-size/open-webui/open-webui)
+![GitHub language count](https://img.shields.io/github/languages/count/open-webui/open-webui)
+![GitHub top language](https://img.shields.io/github/languages/top/open-webui/open-webui)
+![GitHub last commit](https://img.shields.io/github/last-commit/open-webui/open-webui?color=red)
+![Hits](https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2Follama-webui%2Follama-wbui&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false)
+[![Discord](https://img.shields.io/badge/Discord-Open_WebUI-blue?logo=discord&logoColor=white)](https://discord.gg/5rJgQTnV4s)
+[![](https://img.shields.io/static/v1?label=Sponsor&message=%E2%9D%A4&logo=GitHub&color=%23fe8e86)](https://github.com/sponsors/tjbck)
+Open WebUI is an [extensible](https://github.com/open-webui/pipelines), feature-rich, and user-friendly self-hosted WebUI designed to operate entirely offline. It supports various LLM runners, including Ollama and OpenAI-compatible APIs. For more information, be sure to check out our [Open WebUI Documentation](https://docs.openwebui.com/).
+![Open WebUI Demo](./demo.gif)
+## Key Features of Open WebUI ⭐
+- 🚀 **Effortless Setup**: Install seamlessly using Docker or Kubernetes (kubectl, kustomize or helm) for a hassle-free experience with support for both `:ollama` and `:cuda` tagged images.
+- 🤝 **Ollama/OpenAI API Integration**: Effortlessly integrate OpenAI-compatible APIs for versatile conversations alongside Ollama models. Customize the OpenAI API URL to link with **LMStudio, GroqCloud, Mistral, OpenRouter, and more**.
+- 🛡️ **Granular Permissions and User Groups**: By allowing administrators to create detailed user roles and permissions, we ensure a secure user environment. This granularity not only enhances security but also allows for customized user experiences, fostering a sense of ownership and responsibility amongst users.
+- 📱 **Responsive Design**: Enjoy a seamless experience across Desktop PC, Laptop, and Mobile devices.
+- 📱 **Progressive Web App (PWA) for Mobile**: Enjoy a native app-like experience on your mobile device with our PWA, providing offline access on localhost and a seamless user interface.
+- ✒️🔢 **Full Markdown and LaTeX Support**: Elevate your LLM experience with comprehensive Markdown and LaTeX capabilities for enriched interaction.
+- 🎤📹 **Hands-Free Voice/Video Call**: Experience seamless communication with integrated hands-free voice and video call features, allowing for a more dynamic and interactive chat environment.
+- 🛠️ **Model Builder**: Easily create Ollama models via the Web UI. Create and add custom characters/agents, customize chat elements, and import models effortlessly through [Open WebUI Community](https://openwebui.com/) integration.
+- 🐍 **Native Python Function Calling Tool**: Enhance your LLMs with built-in code editor support in the tools workspace. Bring Your Own Function (BYOF) by simply adding your pure Python functions, enabling seamless integration with LLMs.
+- 📚 **Local RAG Integration**: Dive into the future of chat interactions with groundbreaking Retrieval Augmented Generation (RAG) support. This feature seamlessly integrates document interactions into your chat experience. You can load documents directly into the chat or add files to your document library, effortlessly accessing them using the `#` command before a query.
+- 🔍 **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, `Serply`, `DuckDuckGo`, `TavilySearch`, `SearchApi` and `Bing` and inject the results directly into your chat experience.
+- 🌐 **Web Browsing Capability**: Seamlessly integrate websites into your chat experience using the `#` command followed by a URL. This feature allows you to incorporate web content directly into your conversations, enhancing the richness and depth of your interactions.
+- 🎨 **Image Generation Integration**: Seamlessly incorporate image generation capabilities using options such as AUTOMATIC1111 API or ComfyUI (local), and OpenAI's DALL-E (external), enriching your chat experience with dynamic visual content.
+- ⚙️ **Many Models Conversations**: Effortlessly engage with various models simultaneously, harnessing their unique strengths for optimal responses. Enhance your experience by leveraging a diverse set of models in parallel.
+- 🔐 **Role-Based Access Control (RBAC)**: Ensure secure access with restricted permissions; only authorized individuals can access your Ollama, and exclusive model creation/pulling rights are reserved for administrators.
+- 🌐🌍 **Multilingual Support**: Experience Open WebUI in your preferred language with our internationalization (i18n) support. Join us in expanding our supported languages! We're actively seeking contributors!
+- 🧩 **Pipelines, Open WebUI Plugin Support**: Seamlessly integrate custom logic and Python libraries into Open WebUI using [Pipelines Plugin Framework](https://github.com/open-webui/pipelines). Launch your Pipelines instance, set the OpenAI URL to the Pipelines URL, and explore endless possibilities. [Examples](https://github.com/open-webui/pipelines/tree/main/examples) include **Function Calling**, User **Rate Limiting** to control access, **Usage Monitoring** with tools like Langfuse, **Live Translation with LibreTranslate** for multilingual support, **Toxic Message Filtering** and much more.
+- 🌟 **Continuous Updates**: We are committed to improving Open WebUI with regular updates, fixes, and new features.
+Want to learn more about Open WebUI's features? Check out our [Open WebUI documentation](https://docs.openwebui.com/features) for a comprehensive overview!
+## 🔗 Also Check Out Open WebUI Community!
+Don't forget to explore our sibling project, [Open WebUI Community](https://openwebui.com/), where you can discover, download, and explore customized Modelfiles. Open WebUI Community offers a wide range of exciting possibilities for enhancing your chat interactions with Open WebUI! 🚀
+## How to Install 🚀
+### Installation via Python pip 🐍
+Open WebUI can be installed using pip, the Python package installer. Before proceeding, ensure you're using **Python 3.11** to avoid compatibility issues.
+1. **Install Open WebUI**:
+   Open your terminal and run the following command to install Open WebUI:
+   ```bash
+   pip install open-webui
+   ```
+2. **Running Open WebUI**:
+   After installation, you can start Open WebUI by executing:
+   ```bash
+   open-webui serve
+   ```
+This will start the Open WebUI server, which you can access at [http://localhost:8080](http://localhost:8080)
+### Quick Start with Docker 🐳
+> [!NOTE]
+> Please note that for certain Docker environments, additional configurations might be needed. If you encounter any connection issues, our detailed guide on [Open WebUI Documentation](https://docs.openwebui.com/) is ready to assist you.
+> [!WARNING]
+> When using Docker to install Open WebUI, make sure to include the `-v open-webui:/app/backend/data` in your Docker command. This step is crucial as it ensures your database is properly mounted and prevents any loss of data.
+> [!TIP]
+> If you wish to utilize Open WebUI with Ollama included or CUDA acceleration, we recommend utilizing our official images tagged with either `:cuda` or `:ollama`. To enable CUDA, you must install the [Nvidia CUDA container toolkit](https://docs.nvidia.com/dgx/nvidia-container-runtime-upgrade/) on your Linux/WSL system.
+### Installation with Default Configuration
+- **If Ollama is on your computer**, use this command:
+  ```bash
+  docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
+  ```
+- **If Ollama is on a Different Server**, use this command:
+  To connect to Ollama on another server, change the `OLLAMA_BASE_URL` to the server's URL:
+  ```bash
+  docker run -d -p 3000:8080 -e OLLAMA_BASE_URL=https://example.com -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
+  ```
+- **To run Open WebUI with Nvidia GPU support**, use this command:
+  ```bash
+  docker run -d -p 3000:8080 --gpus all --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:cuda
+  ```
+### Installation for OpenAI API Usage Only
+- **If you're only using OpenAI API**, use this command:
+  ```bash
+  docker run -d -p 3000:8080 -e OPENAI_API_KEY=your_secret_key -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
+  ```
+### Installing Open WebUI with Bundled Ollama Support
+This installation method uses a single container image that bundles Open WebUI with Ollama, allowing for a streamlined setup via a single command. Choose the appropriate command based on your hardware setup:
+- **With GPU Support**:
+  Utilize GPU resources by running the following command:
+  ```bash
+  docker run -d -p 3000:8080 --gpus=all -v ollama:/root/.ollama -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:ollama
+  ```
+- **For CPU Only**:
+  If you're not using a GPU, use this command instead:
+  ```bash
+  docker run -d -p 3000:8080 -v ollama:/root/.ollama -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:ollama
+  ```
+Both commands facilitate a built-in, hassle-free installation of both Open WebUI and Ollama, ensuring that you can get everything up and running swiftly.
+After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄
+### Other Installation Methods
+We offer various installation alternatives, including non-Docker native installation methods, Docker Compose, Kustomize, and Helm. Visit our [Open WebUI Documentation](https://docs.openwebui.com/getting-started/) or join our [Discord community](https://discord.gg/5rJgQTnV4s) for comprehensive guidance.
+### Troubleshooting
+Encountering connection issues? Our [Open WebUI Documentation](https://docs.openwebui.com/troubleshooting/) has got you covered. For further assistance and to join our vibrant community, visit the [Open WebUI Discord](https://discord.gg/5rJgQTnV4s).
+#### Open WebUI: Server Connection Error
+If you're experiencing connection issues, it’s often due to the WebUI docker container not being able to reach the Ollama server at 127.0.0.1:11434 (host.docker.internal:11434) inside the container . Use the `--network=host` flag in your docker command to resolve this. Note that the port changes from 3000 to 8080, resulting in the link: `http://localhost:8080`.
+**Example Docker Command**:
+```bash
+docker run -d --network=host -v open-webui:/app/backend/data -e OLLAMA_BASE_URL=http://127.0.0.1:11434 --name open-webui --restart always ghcr.io/open-webui/open-webui:main
+```
+### Keeping Your Docker Installation Up-to-Date
+In case you want to update your local Docker installation to the latest version, you can do it with [Watchtower](https://containrrr.dev/watchtower/):
+```bash
+docker run --rm --volume /var/run/docker.sock:/var/run/docker.sock containrrr/watchtower --run-once open-webui
+```
+In the last part of the command, replace `open-webui` with your container name if it is different.
+Check our Migration Guide available in our [Open WebUI Documentation](https://docs.openwebui.com/tutorials/migration/).
+### Using the Dev Branch 🌙
+> [!WARNING]
+> The `:dev` branch contains the latest unstable features and changes. Use it at your own risk as it may have bugs or incomplete features.
+If you want to try out the latest bleeding-edge features and are okay with occasional instability, you can use the `:dev` tag like this:
+```bash
+docker run -d -p 3000:8080 -v open-webui:/app/backend/data --name open-webui --add-host=host.docker.internal:host-gateway --restart always ghcr.io/open-webui/open-webui:dev
+```
+## What's Next? 🌟
+Discover upcoming features on our roadmap in the [Open WebUI Documentation](https://docs.openwebui.com/roadmap/).
+## License 📜
+This project is licensed under the [MIT License](LICENSE) - see the [LICENSE](LICENSE) file for details. 📄
+## Support 💬
+If you have any questions, suggestions, or need assistance, please open an issue or join our
+[Open WebUI Discord community](https://discord.gg/5rJgQTnV4s) to connect with us! 🤝
+## Star History
+<a href="https://star-history.com/#open-webui/open-webui&Date">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=open-webui/open-webui&type=Date&theme=dark" />
+    <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=open-webui/open-webui&type=Date" />
+    <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=open-webui/open-webui&type=Date" />
+  </picture>
+</a>
+---
+Created by [Timothy Jaeryang Baek](https://github.com/tjbck) - Let's make Open WebUI even more amazing together! 💪

TROUBLESHOOTING.md ADDED Viewed

	@@ -0,0 +1,36 @@

+# Open WebUI Troubleshooting Guide
+## Understanding the Open WebUI Architecture
+The Open WebUI system is designed to streamline interactions between the client (your browser) and the Ollama API. At the heart of this design is a backend reverse proxy, enhancing security and resolving CORS issues.
+- **How it Works**: The Open WebUI is designed to interact with the Ollama API through a specific route. When a request is made from the WebUI to Ollama, it is not directly sent to the Ollama API. Initially, the request is sent to the Open WebUI backend via `/ollama` route. From there, the backend is responsible for forwarding the request to the Ollama API. This forwarding is accomplished by using the route specified in the `OLLAMA_BASE_URL` environment variable. Therefore, a request made to `/ollama` in the WebUI is effectively the same as making a request to `OLLAMA_BASE_URL` in the backend. For instance, a request to `/ollama/api/tags` in the WebUI is equivalent to `OLLAMA_BASE_URL/api/tags` in the backend.
+- **Security Benefits**: This design prevents direct exposure of the Ollama API to the frontend, safeguarding against potential CORS (Cross-Origin Resource Sharing) issues and unauthorized access. Requiring authentication to access the Ollama API further enhances this security layer.
+## Open WebUI: Server Connection Error
+If you're experiencing connection issues, it’s often due to the WebUI docker container not being able to reach the Ollama server at 127.0.0.1:11434 (host.docker.internal:11434) inside the container . Use the `--network=host` flag in your docker command to resolve this. Note that the port changes from 3000 to 8080, resulting in the link: `http://localhost:8080`.
+**Example Docker Command**:
+```bash
+docker run -d --network=host -v open-webui:/app/backend/data -e OLLAMA_BASE_URL=http://127.0.0.1:11434 --name open-webui --restart always ghcr.io/open-webui/open-webui:main
+```
+### Error on Slow Responses for Ollama
+Open WebUI has a default timeout of 5 minutes for Ollama to finish generating the response. If needed, this can be adjusted via the environment variable AIOHTTP_CLIENT_TIMEOUT, which sets the timeout in seconds.
+### General Connection Errors
+**Ensure Ollama Version is Up-to-Date**: Always start by checking that you have the latest version of Ollama. Visit [Ollama's official site](https://ollama.com/) for the latest updates.
+**Troubleshooting Steps**:
+1. **Verify Ollama URL Format**:
+   - When running the Web UI container, ensure the `OLLAMA_BASE_URL` is correctly set. (e.g., `http://192.168.1.1:11434` for different host setups).
+   - In the Open WebUI, navigate to "Settings" > "General".
+   - Confirm that the Ollama Server URL is correctly set to `[OLLAMA URL]` (e.g., `http://localhost:11434`).
+By following these enhanced troubleshooting steps, connection issues should be effectively resolved. For further assistance or queries, feel free to reach out to us on our community Discord.

backend/.dockerignore ADDED Viewed

	@@ -0,0 +1,14 @@

+__pycache__
+.env
+_old
+uploads
+.ipynb_checkpoints
+*.db
+_test
+!/data
+/data/*
+!/data/litellm
+/data/litellm/*
+!data/litellm/config.yaml
+!data/config.json

backend/.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+__pycache__
+.env
+_old
+uploads
+.ipynb_checkpoints
+*.db
+_test
+Pipfile
+!/data
+/data/*
+/open_webui/data/*
+.webui_secret_key

backend/dev.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ PORT="${PORT:-8080}"
2	+ uvicorn open_webui.main:app --port $PORT --host 0.0.0.0 --forwarded-allow-ips '*' --reload

backend/open_webui/__init__.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import base64
+import os
+import random
+from pathlib import Path
+import typer
+import uvicorn
+app = typer.Typer()
+KEY_FILE = Path.cwd() / ".webui_secret_key"
+@app.command()
+def serve(
+    host: str = "0.0.0.0",
+    port: int = 8080,
+):
+    os.environ["FROM_INIT_PY"] = "true"
+    if os.getenv("WEBUI_SECRET_KEY") is None:
+        typer.echo(
+            "Loading WEBUI_SECRET_KEY from file, not provided as an environment variable."
+        )
+        if not KEY_FILE.exists():
+            typer.echo(f"Generating a new secret key and saving it to {KEY_FILE}")
+            KEY_FILE.write_bytes(base64.b64encode(random.randbytes(12)))
+        typer.echo(f"Loading WEBUI_SECRET_KEY from {KEY_FILE}")
+        os.environ["WEBUI_SECRET_KEY"] = KEY_FILE.read_text()
+    if os.getenv("USE_CUDA_DOCKER", "false") == "true":
+        typer.echo(
+            "CUDA is enabled, appending LD_LIBRARY_PATH to include torch/cudnn & cublas libraries."
+        )
+        LD_LIBRARY_PATH = os.getenv("LD_LIBRARY_PATH", "").split(":")
+        os.environ["LD_LIBRARY_PATH"] = ":".join(
+            LD_LIBRARY_PATH
+            + [
+                "/usr/local/lib/python3.11/site-packages/torch/lib",
+                "/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib",
+            ]
+        )
+        try:
+            import torch
+            assert torch.cuda.is_available(), "CUDA not available"
+            typer.echo("CUDA seems to be working")
+        except Exception as e:
+            typer.echo(
+                "Error when testing CUDA but USE_CUDA_DOCKER is true. "
+                "Resetting USE_CUDA_DOCKER to false and removing "
+                f"LD_LIBRARY_PATH modifications: {e}"
+            )
+            os.environ["USE_CUDA_DOCKER"] = "false"
+            os.environ["LD_LIBRARY_PATH"] = ":".join(LD_LIBRARY_PATH)
+    import open_webui.main  # we need set environment variables before importing main
+    uvicorn.run(open_webui.main.app, host=host, port=port, forwarded_allow_ips="*")
+@app.command()
+def dev(
+    host: str = "0.0.0.0",
+    port: int = 8080,
+    reload: bool = True,
+):
+    uvicorn.run(
+        "open_webui.main:app",
+        host=host,
+        port=port,
+        reload=reload,
+        forwarded_allow_ips="*",
+    )
+if __name__ == "__main__":
+    app()

backend/open_webui/alembic.ini ADDED Viewed

	@@ -0,0 +1,114 @@

+# A generic, single database configuration.
+[alembic]
+# path to migration scripts
+script_location = migrations
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+# max length of characters to apply to the
+# "slug" field
+# truncate_slug_length = 40
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+# version location specification; This defaults
+# to migrations/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+# sqlalchemy.url = REPLACE_WITH_DATABASE_URL
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = %(here)s/.venv/bin/ruff
+# ruff.options = --fix REVISION_SCRIPT_FILENAME
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+[handlers]
+keys = console
+[formatters]
+keys = generic
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S

backend/open_webui/apps/audio/main.py ADDED Viewed

	@@ -0,0 +1,703 @@

+import hashlib
+import json
+import logging
+import os
+import uuid
+from functools import lru_cache
+from pathlib import Path
+from pydub import AudioSegment
+from pydub.silence import split_on_silence
+import aiohttp
+import aiofiles
+import requests
+from open_webui.config import (
+    AUDIO_STT_ENGINE,
+    AUDIO_STT_MODEL,
+    AUDIO_STT_OPENAI_API_BASE_URL,
+    AUDIO_STT_OPENAI_API_KEY,
+    AUDIO_TTS_API_KEY,
+    AUDIO_TTS_ENGINE,
+    AUDIO_TTS_MODEL,
+    AUDIO_TTS_OPENAI_API_BASE_URL,
+    AUDIO_TTS_OPENAI_API_KEY,
+    AUDIO_TTS_SPLIT_ON,
+    AUDIO_TTS_VOICE,
+    AUDIO_TTS_AZURE_SPEECH_REGION,
+    AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT,
+    CACHE_DIR,
+    CORS_ALLOW_ORIGIN,
+    WHISPER_MODEL,
+    WHISPER_MODEL_AUTO_UPDATE,
+    WHISPER_MODEL_DIR,
+    AppConfig,
+)
+from open_webui.constants import ERROR_MESSAGES
+from open_webui.env import (
+    ENV,
+    SRC_LOG_LEVELS,
+    DEVICE_TYPE,
+    ENABLE_FORWARD_USER_INFO_HEADERS,
+)
+from fastapi import Depends, FastAPI, File, HTTPException, Request, UploadFile, status
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
+from pydantic import BaseModel
+from open_webui.utils.utils import get_admin_user, get_verified_user
+# Constants
+MAX_FILE_SIZE_MB = 25
+MAX_FILE_SIZE = MAX_FILE_SIZE_MB * 1024 * 1024  # Convert MB to bytes
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["AUDIO"])
+app = FastAPI(
+    docs_url="/docs" if ENV == "dev" else None,
+    openapi_url="/openapi.json" if ENV == "dev" else None,
+    redoc_url=None,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=CORS_ALLOW_ORIGIN,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.state.config = AppConfig()
+app.state.config.STT_OPENAI_API_BASE_URL = AUDIO_STT_OPENAI_API_BASE_URL
+app.state.config.STT_OPENAI_API_KEY = AUDIO_STT_OPENAI_API_KEY
+app.state.config.STT_ENGINE = AUDIO_STT_ENGINE
+app.state.config.STT_MODEL = AUDIO_STT_MODEL
+app.state.config.WHISPER_MODEL = WHISPER_MODEL
+app.state.faster_whisper_model = None
+app.state.config.TTS_OPENAI_API_BASE_URL = AUDIO_TTS_OPENAI_API_BASE_URL
+app.state.config.TTS_OPENAI_API_KEY = AUDIO_TTS_OPENAI_API_KEY
+app.state.config.TTS_ENGINE = AUDIO_TTS_ENGINE
+app.state.config.TTS_MODEL = AUDIO_TTS_MODEL
+app.state.config.TTS_VOICE = AUDIO_TTS_VOICE
+app.state.config.TTS_API_KEY = AUDIO_TTS_API_KEY
+app.state.config.TTS_SPLIT_ON = AUDIO_TTS_SPLIT_ON
+app.state.speech_synthesiser = None
+app.state.speech_speaker_embeddings_dataset = None
+app.state.config.TTS_AZURE_SPEECH_REGION = AUDIO_TTS_AZURE_SPEECH_REGION
+app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT = AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT
+# setting device type for whisper model
+whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
+log.info(f"whisper_device_type: {whisper_device_type}")
+SPEECH_CACHE_DIR = Path(CACHE_DIR).joinpath("./audio/speech/")
+SPEECH_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+def set_faster_whisper_model(model: str, auto_update: bool = False):
+    if model and app.state.config.STT_ENGINE == "":
+        from faster_whisper import WhisperModel
+        faster_whisper_kwargs = {
+            "model_size_or_path": model,
+            "device": whisper_device_type,
+            "compute_type": "int8",
+            "download_root": WHISPER_MODEL_DIR,
+            "local_files_only": not auto_update,
+        }
+        try:
+            app.state.faster_whisper_model = WhisperModel(**faster_whisper_kwargs)
+        except Exception:
+            log.warning(
+                "WhisperModel initialization failed, attempting download with local_files_only=False"
+            )
+            faster_whisper_kwargs["local_files_only"] = False
+            app.state.faster_whisper_model = WhisperModel(**faster_whisper_kwargs)
+    else:
+        app.state.faster_whisper_model = None
+class TTSConfigForm(BaseModel):
+    OPENAI_API_BASE_URL: str
+    OPENAI_API_KEY: str
+    API_KEY: str
+    ENGINE: str
+    MODEL: str
+    VOICE: str
+    SPLIT_ON: str
+    AZURE_SPEECH_REGION: str
+    AZURE_SPEECH_OUTPUT_FORMAT: str
+class STTConfigForm(BaseModel):
+    OPENAI_API_BASE_URL: str
+    OPENAI_API_KEY: str
+    ENGINE: str
+    MODEL: str
+    WHISPER_MODEL: str
+class AudioConfigUpdateForm(BaseModel):
+    tts: TTSConfigForm
+    stt: STTConfigForm
+from pydub import AudioSegment
+from pydub.utils import mediainfo
+def is_mp4_audio(file_path):
+    """Check if the given file is an MP4 audio file."""
+    if not os.path.isfile(file_path):
+        print(f"File not found: {file_path}")
+        return False
+    info = mediainfo(file_path)
+    if (
+        info.get("codec_name") == "aac"
+        and info.get("codec_type") == "audio"
+        and info.get("codec_tag_string") == "mp4a"
+    ):
+        return True
+    return False
+def convert_mp4_to_wav(file_path, output_path):
+    """Convert MP4 audio file to WAV format."""
+    audio = AudioSegment.from_file(file_path, format="mp4")
+    audio.export(output_path, format="wav")
+    print(f"Converted {file_path} to {output_path}")
+@app.get("/config")
+async def get_audio_config(user=Depends(get_admin_user)):
+    return {
+        "tts": {
+            "OPENAI_API_BASE_URL": app.state.config.TTS_OPENAI_API_BASE_URL,
+            "OPENAI_API_KEY": app.state.config.TTS_OPENAI_API_KEY,
+            "API_KEY": app.state.config.TTS_API_KEY,
+            "ENGINE": app.state.config.TTS_ENGINE,
+            "MODEL": app.state.config.TTS_MODEL,
+            "VOICE": app.state.config.TTS_VOICE,
+            "SPLIT_ON": app.state.config.TTS_SPLIT_ON,
+            "AZURE_SPEECH_REGION": app.state.config.TTS_AZURE_SPEECH_REGION,
+            "AZURE_SPEECH_OUTPUT_FORMAT": app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
+        },
+        "stt": {
+            "OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
+            "OPENAI_API_KEY": app.state.config.STT_OPENAI_API_KEY,
+            "ENGINE": app.state.config.STT_ENGINE,
+            "MODEL": app.state.config.STT_MODEL,
+            "WHISPER_MODEL": app.state.config.WHISPER_MODEL,
+        },
+    }
+@app.post("/config/update")
+async def update_audio_config(
+    form_data: AudioConfigUpdateForm, user=Depends(get_admin_user)
+):
+    app.state.config.TTS_OPENAI_API_BASE_URL = form_data.tts.OPENAI_API_BASE_URL
+    app.state.config.TTS_OPENAI_API_KEY = form_data.tts.OPENAI_API_KEY
+    app.state.config.TTS_API_KEY = form_data.tts.API_KEY
+    app.state.config.TTS_ENGINE = form_data.tts.ENGINE
+    app.state.config.TTS_MODEL = form_data.tts.MODEL
+    app.state.config.TTS_VOICE = form_data.tts.VOICE
+    app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON
+    app.state.config.TTS_AZURE_SPEECH_REGION = form_data.tts.AZURE_SPEECH_REGION
+    app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT = (
+        form_data.tts.AZURE_SPEECH_OUTPUT_FORMAT
+    )
+    app.state.config.STT_OPENAI_API_BASE_URL = form_data.stt.OPENAI_API_BASE_URL
+    app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
+    app.state.config.STT_ENGINE = form_data.stt.ENGINE
+    app.state.config.STT_MODEL = form_data.stt.MODEL
+    app.state.config.WHISPER_MODEL = form_data.stt.WHISPER_MODEL
+    set_faster_whisper_model(form_data.stt.WHISPER_MODEL, WHISPER_MODEL_AUTO_UPDATE)
+    return {
+        "tts": {
+            "OPENAI_API_BASE_URL": app.state.config.TTS_OPENAI_API_BASE_URL,
+            "OPENAI_API_KEY": app.state.config.TTS_OPENAI_API_KEY,
+            "API_KEY": app.state.config.TTS_API_KEY,
+            "ENGINE": app.state.config.TTS_ENGINE,
+            "MODEL": app.state.config.TTS_MODEL,
+            "VOICE": app.state.config.TTS_VOICE,
+            "SPLIT_ON": app.state.config.TTS_SPLIT_ON,
+            "AZURE_SPEECH_REGION": app.state.config.TTS_AZURE_SPEECH_REGION,
+            "AZURE_SPEECH_OUTPUT_FORMAT": app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
+        },
+        "stt": {
+            "OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
+            "OPENAI_API_KEY": app.state.config.STT_OPENAI_API_KEY,
+            "ENGINE": app.state.config.STT_ENGINE,
+            "MODEL": app.state.config.STT_MODEL,
+            "WHISPER_MODEL": app.state.config.WHISPER_MODEL,
+        },
+    }
+def load_speech_pipeline():
+    from transformers import pipeline
+    from datasets import load_dataset
+    if app.state.speech_synthesiser is None:
+        app.state.speech_synthesiser = pipeline(
+            "text-to-speech", "microsoft/speecht5_tts"
+        )
+    if app.state.speech_speaker_embeddings_dataset is None:
+        app.state.speech_speaker_embeddings_dataset = load_dataset(
+            "Matthijs/cmu-arctic-xvectors", split="validation"
+        )
+@app.post("/speech")
+async def speech(request: Request, user=Depends(get_verified_user)):
+    body = await request.body()
+    name = hashlib.sha256(body).hexdigest()
+    file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3")
+    file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json")
+    # Check if the file already exists in the cache
+    if file_path.is_file():
+        return FileResponse(file_path)
+    if app.state.config.TTS_ENGINE == "openai":
+        headers = {}
+        headers["Authorization"] = f"Bearer {app.state.config.TTS_OPENAI_API_KEY}"
+        headers["Content-Type"] = "application/json"
+        if ENABLE_FORWARD_USER_INFO_HEADERS:
+            headers["X-OpenWebUI-User-Name"] = user.name
+            headers["X-OpenWebUI-User-Id"] = user.id
+            headers["X-OpenWebUI-User-Email"] = user.email
+            headers["X-OpenWebUI-User-Role"] = user.role
+        try:
+            body = body.decode("utf-8")
+            body = json.loads(body)
+            body["model"] = app.state.config.TTS_MODEL
+            body = json.dumps(body).encode("utf-8")
+        except Exception:
+            pass
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(
+                    url=f"{app.state.config.TTS_OPENAI_API_BASE_URL}/audio/speech",
+                    data=body,
+                    headers=headers,
+                ) as r:
+                    r.raise_for_status()
+                    async with aiofiles.open(file_path, "wb") as f:
+                        await f.write(await r.read())
+                    async with aiofiles.open(file_body_path, "w") as f:
+                        await f.write(json.dumps(json.loads(body.decode("utf-8"))))
+            return FileResponse(file_path)
+        except Exception as e:
+            log.exception(e)
+            error_detail = "Open WebUI: Server Connection Error"
+            try:
+                if r.status != 200:
+                    res = await r.json()
+                    if "error" in res:
+                        error_detail = f"External: {res['error']['message']}"
+            except Exception:
+                error_detail = f"External: {e}"
+            raise HTTPException(
+                status_code=getattr(r, "status", 500),
+                detail=error_detail,
+            )
+    elif app.state.config.TTS_ENGINE == "elevenlabs":
+        try:
+            payload = json.loads(body.decode("utf-8"))
+        except Exception as e:
+            log.exception(e)
+            raise HTTPException(status_code=400, detail="Invalid JSON payload")
+        voice_id = payload.get("voice", "")
+        if voice_id not in get_available_voices():
+            raise HTTPException(
+                status_code=400,
+                detail="Invalid voice id",
+            )
+        url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
+        headers = {
+            "Accept": "audio/mpeg",
+            "Content-Type": "application/json",
+            "xi-api-key": app.state.config.TTS_API_KEY,
+        }
+        data = {
+            "text": payload["input"],
+            "model_id": app.state.config.TTS_MODEL,
+            "voice_settings": {"stability": 0.5, "similarity_boost": 0.5},
+        }
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(url, json=data, headers=headers) as r:
+                    r.raise_for_status()
+                    async with aiofiles.open(file_path, "wb") as f:
+                        await f.write(await r.read())
+                    async with aiofiles.open(file_body_path, "w") as f:
+                        await f.write(json.dumps(json.loads(body.decode("utf-8"))))
+            return FileResponse(file_path)
+        except Exception as e:
+            log.exception(e)
+            error_detail = "Open WebUI: Server Connection Error"
+            try:
+                if r.status != 200:
+                    res = await r.json()
+                    if "error" in res:
+                        error_detail = f"External: {res['error']['message']}"
+            except Exception:
+                error_detail = f"External: {e}"
+            raise HTTPException(
+                status_code=getattr(r, "status", 500),
+                detail=error_detail,
+            )
+    elif app.state.config.TTS_ENGINE == "azure":
+        try:
+            payload = json.loads(body.decode("utf-8"))
+        except Exception as e:
+            log.exception(e)
+            raise HTTPException(status_code=400, detail="Invalid JSON payload")
+        region = app.state.config.TTS_AZURE_SPEECH_REGION
+        language = app.state.config.TTS_VOICE
+        locale = "-".join(app.state.config.TTS_VOICE.split("-")[:1])
+        output_format = app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT
+        url = f"https://{region}.tts.speech.microsoft.com/cognitiveservices/v1"
+        headers = {
+            "Ocp-Apim-Subscription-Key": app.state.config.TTS_API_KEY,
+            "Content-Type": "application/ssml+xml",
+            "X-Microsoft-OutputFormat": output_format,
+        }
+        data = f"""<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{locale}">
+                <voice name="{language}">{payload["input"]}</voice>
+            </speak>"""
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(url, headers=headers, data=data) as response:
+                    if response.status == 200:
+                        async with aiofiles.open(file_path, "wb") as f:
+                            await f.write(await response.read())
+                        return FileResponse(file_path)
+                    else:
+                        error_msg = f"Error synthesizing speech - {response.reason}"
+                        log.error(error_msg)
+                        raise HTTPException(status_code=500, detail=error_msg)
+        except Exception as e:
+            log.exception(e)
+            raise HTTPException(status_code=500, detail=str(e))
+    elif app.state.config.TTS_ENGINE == "transformers":
+        payload = None
+        try:
+            payload = json.loads(body.decode("utf-8"))
+        except Exception as e:
+            log.exception(e)
+            raise HTTPException(status_code=400, detail="Invalid JSON payload")
+        import torch
+        import soundfile as sf
+        load_speech_pipeline()
+        embeddings_dataset = app.state.speech_speaker_embeddings_dataset
+        speaker_index = 6799
+        try:
+            speaker_index = embeddings_dataset["filename"].index(
+                app.state.config.TTS_MODEL
+            )
+        except Exception:
+            pass
+        speaker_embedding = torch.tensor(
+            embeddings_dataset[speaker_index]["xvector"]
+        ).unsqueeze(0)
+        speech = app.state.speech_synthesiser(
+            payload["input"],
+            forward_params={"speaker_embeddings": speaker_embedding},
+        )
+        sf.write(file_path, speech["audio"], samplerate=speech["sampling_rate"])
+        with open(file_body_path, "w") as f:
+            json.dump(json.loads(body.decode("utf-8")), f)
+        return FileResponse(file_path)
+def transcribe(file_path):
+    print("transcribe", file_path)
+    filename = os.path.basename(file_path)
+    file_dir = os.path.dirname(file_path)
+    id = filename.split(".")[0]
+    if app.state.config.STT_ENGINE == "":
+        if app.state.faster_whisper_model is None:
+            set_faster_whisper_model(app.state.config.WHISPER_MODEL)
+        model = app.state.faster_whisper_model
+        segments, info = model.transcribe(file_path, beam_size=5)
+        log.info(
+            "Detected language '%s' with probability %f"
+            % (info.language, info.language_probability)
+        )
+        transcript = "".join([segment.text for segment in list(segments)])
+        data = {"text": transcript.strip()}
+        # save the transcript to a json file
+        transcript_file = f"{file_dir}/{id}.json"
+        with open(transcript_file, "w") as f:
+            json.dump(data, f)
+        log.debug(data)
+        return data
+    elif app.state.config.STT_ENGINE == "openai":
+        if is_mp4_audio(file_path):
+            print("is_mp4_audio")
+            os.rename(file_path, file_path.replace(".wav", ".mp4"))
+            # Convert MP4 audio file to WAV format
+            convert_mp4_to_wav(file_path.replace(".wav", ".mp4"), file_path)
+        headers = {"Authorization": f"Bearer {app.state.config.STT_OPENAI_API_KEY}"}
+        files = {"file": (filename, open(file_path, "rb"))}
+        data = {"model": app.state.config.STT_MODEL}
+        log.debug(files, data)
+        r = None
+        try:
+            r = requests.post(
+                url=f"{app.state.config.STT_OPENAI_API_BASE_URL}/audio/transcriptions",
+                headers=headers,
+                files=files,
+                data=data,
+            )
+            r.raise_for_status()
+            data = r.json()
+            # save the transcript to a json file
+            transcript_file = f"{file_dir}/{id}.json"
+            with open(transcript_file, "w") as f:
+                json.dump(data, f)
+            print(data)
+            return data
+        except Exception as e:
+            log.exception(e)
+            error_detail = "Open WebUI: Server Connection Error"
+            if r is not None:
+                try:
+                    res = r.json()
+                    if "error" in res:
+                        error_detail = f"External: {res['error']['message']}"
+                except Exception:
+                    error_detail = f"External: {e}"
+            raise Exception(error_detail)
+@app.post("/transcriptions")
+def transcription(
+    file: UploadFile = File(...),
+    user=Depends(get_verified_user),
+):
+    log.info(f"file.content_type: {file.content_type}")
+    if file.content_type not in ["audio/mpeg", "audio/wav", "audio/ogg", "audio/x-m4a"]:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
+        )
+    try:
+        ext = file.filename.split(".")[-1]
+        id = uuid.uuid4()
+        filename = f"{id}.{ext}"
+        contents = file.file.read()
+        file_dir = f"{CACHE_DIR}/audio/transcriptions"
+        os.makedirs(file_dir, exist_ok=True)
+        file_path = f"{file_dir}/{filename}"
+        with open(file_path, "wb") as f:
+            f.write(contents)
+        try:
+            if os.path.getsize(file_path) > MAX_FILE_SIZE:  # file is bigger than 25MB
+                log.debug(f"File size is larger than {MAX_FILE_SIZE_MB}MB")
+                audio = AudioSegment.from_file(file_path)
+                audio = audio.set_frame_rate(16000).set_channels(1)  # Compress audio
+                compressed_path = f"{file_dir}/{id}_compressed.opus"
+                audio.export(compressed_path, format="opus", bitrate="32k")
+                log.debug(f"Compressed audio to {compressed_path}")
+                file_path = compressed_path
+                if (
+                    os.path.getsize(file_path) > MAX_FILE_SIZE
+                ):  # Still larger than 25MB after compression
+                    log.debug(
+                        f"Compressed file size is still larger than {MAX_FILE_SIZE_MB}MB: {os.path.getsize(file_path)}"
+                    )
+                    raise HTTPException(
+                        status_code=status.HTTP_400_BAD_REQUEST,
+                        detail=ERROR_MESSAGES.FILE_TOO_LARGE(
+                            size=f"{MAX_FILE_SIZE_MB}MB"
+                        ),
+                    )
+                data = transcribe(file_path)
+            else:
+                data = transcribe(file_path)
+            file_path = file_path.split("/")[-1]
+            return {**data, "filename": file_path}
+        except Exception as e:
+            log.exception(e)
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=ERROR_MESSAGES.DEFAULT(e),
+            )
+    except Exception as e:
+        log.exception(e)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=ERROR_MESSAGES.DEFAULT(e),
+        )
+def get_available_models() -> list[dict]:
+    if app.state.config.TTS_ENGINE == "openai":
+        return [{"id": "tts-1"}, {"id": "tts-1-hd"}]
+    elif app.state.config.TTS_ENGINE == "elevenlabs":
+        headers = {
+            "xi-api-key": app.state.config.TTS_API_KEY,
+            "Content-Type": "application/json",
+        }
+        try:
+            response = requests.get(
+                "https://api.elevenlabs.io/v1/models", headers=headers, timeout=5
+            )
+            response.raise_for_status()
+            models = response.json()
+            return [
+                {"name": model["name"], "id": model["model_id"]} for model in models
+            ]
+        except requests.RequestException as e:
+            log.error(f"Error fetching voices: {str(e)}")
+    return []
+@app.get("/models")
+async def get_models(user=Depends(get_verified_user)):
+    return {"models": get_available_models()}
+def get_available_voices() -> dict:
+    """Returns {voice_id: voice_name} dict"""
+    ret = {}
+    if app.state.config.TTS_ENGINE == "openai":
+        ret = {
+            "alloy": "alloy",
+            "echo": "echo",
+            "fable": "fable",
+            "onyx": "onyx",
+            "nova": "nova",
+            "shimmer": "shimmer",
+        }
+    elif app.state.config.TTS_ENGINE == "elevenlabs":
+        try:
+            ret = get_elevenlabs_voices()
+        except Exception:
+            # Avoided @lru_cache with exception
+            pass
+    elif app.state.config.TTS_ENGINE == "azure":
+        try:
+            region = app.state.config.TTS_AZURE_SPEECH_REGION
+            url = f"https://{region}.tts.speech.microsoft.com/cognitiveservices/voices/list"
+            headers = {"Ocp-Apim-Subscription-Key": app.state.config.TTS_API_KEY}
+            response = requests.get(url, headers=headers)
+            response.raise_for_status()
+            voices = response.json()
+            for voice in voices:
+                ret[voice["ShortName"]] = (
+                    f"{voice['DisplayName']} ({voice['ShortName']})"
+                )
+        except requests.RequestException as e:
+            log.error(f"Error fetching voices: {str(e)}")
+    return ret
+@lru_cache
+def get_elevenlabs_voices() -> dict:
+    """
+    Note, set the following in your .env file to use Elevenlabs:
+    AUDIO_TTS_ENGINE=elevenlabs
+    AUDIO_TTS_API_KEY=sk_...  # Your Elevenlabs API key
+    AUDIO_TTS_VOICE=EXAVITQu4vr4xnSDxMaL  # From https://api.elevenlabs.io/v1/voices
+    AUDIO_TTS_MODEL=eleven_multilingual_v2
+    """
+    headers = {
+        "xi-api-key": app.state.config.TTS_API_KEY,
+        "Content-Type": "application/json",
+    }
+    try:
+        # TODO: Add retries
+        response = requests.get("https://api.elevenlabs.io/v1/voices", headers=headers)
+        response.raise_for_status()
+        voices_data = response.json()
+        voices = {}
+        for voice in voices_data.get("voices", []):
+            voices[voice["voice_id"]] = voice["name"]
+    except requests.RequestException as e:
+        # Avoid @lru_cache with exception
+        log.error(f"Error fetching voices: {str(e)}")
+        raise RuntimeError(f"Error fetching voices: {str(e)}")
+    return voices
+@app.get("/voices")
+async def get_voices(user=Depends(get_verified_user)):
+    return {"voices": [{"id": k, "name": v} for k, v in get_available_voices().items()]}

backend/open_webui/apps/images/main.py ADDED Viewed

	@@ -0,0 +1,609 @@

+import asyncio
+import base64
+import json
+import logging
+import mimetypes
+import re
+import uuid
+from pathlib import Path
+from typing import Optional
+import requests
+from open_webui.apps.images.utils.comfyui import (
+    ComfyUIGenerateImageForm,
+    ComfyUIWorkflow,
+    comfyui_generate_image,
+)
+from open_webui.config import (
+    AUTOMATIC1111_API_AUTH,
+    AUTOMATIC1111_BASE_URL,
+    AUTOMATIC1111_CFG_SCALE,
+    AUTOMATIC1111_SAMPLER,
+    AUTOMATIC1111_SCHEDULER,
+    CACHE_DIR,
+    COMFYUI_BASE_URL,
+    COMFYUI_WORKFLOW,
+    COMFYUI_WORKFLOW_NODES,
+    CORS_ALLOW_ORIGIN,
+    ENABLE_IMAGE_GENERATION,
+    IMAGE_GENERATION_ENGINE,
+    IMAGE_GENERATION_MODEL,
+    IMAGE_SIZE,
+    IMAGE_STEPS,
+    IMAGES_OPENAI_API_BASE_URL,
+    IMAGES_OPENAI_API_KEY,
+    AppConfig,
+)
+from open_webui.constants import ERROR_MESSAGES
+from open_webui.env import ENV, SRC_LOG_LEVELS, ENABLE_FORWARD_USER_INFO_HEADERS
+from fastapi import Depends, FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from open_webui.utils.utils import get_admin_user, get_verified_user
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["IMAGES"])
+IMAGE_CACHE_DIR = Path(CACHE_DIR).joinpath("./image/generations/")
+IMAGE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+app = FastAPI(
+    docs_url="/docs" if ENV == "dev" else None,
+    openapi_url="/openapi.json" if ENV == "dev" else None,
+    redoc_url=None,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=CORS_ALLOW_ORIGIN,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.state.config = AppConfig()
+app.state.config.ENGINE = IMAGE_GENERATION_ENGINE
+app.state.config.ENABLED = ENABLE_IMAGE_GENERATION
+app.state.config.OPENAI_API_BASE_URL = IMAGES_OPENAI_API_BASE_URL
+app.state.config.OPENAI_API_KEY = IMAGES_OPENAI_API_KEY
+app.state.config.MODEL = IMAGE_GENERATION_MODEL
+app.state.config.AUTOMATIC1111_BASE_URL = AUTOMATIC1111_BASE_URL
+app.state.config.AUTOMATIC1111_API_AUTH = AUTOMATIC1111_API_AUTH
+app.state.config.AUTOMATIC1111_CFG_SCALE = AUTOMATIC1111_CFG_SCALE
+app.state.config.AUTOMATIC1111_SAMPLER = AUTOMATIC1111_SAMPLER
+app.state.config.AUTOMATIC1111_SCHEDULER = AUTOMATIC1111_SCHEDULER
+app.state.config.COMFYUI_BASE_URL = COMFYUI_BASE_URL
+app.state.config.COMFYUI_WORKFLOW = COMFYUI_WORKFLOW
+app.state.config.COMFYUI_WORKFLOW_NODES = COMFYUI_WORKFLOW_NODES
+app.state.config.IMAGE_SIZE = IMAGE_SIZE
+app.state.config.IMAGE_STEPS = IMAGE_STEPS
+@app.get("/config")
+async def get_config(request: Request, user=Depends(get_admin_user)):
+    return {
+        "enabled": app.state.config.ENABLED,
+        "engine": app.state.config.ENGINE,
+        "openai": {
+            "OPENAI_API_BASE_URL": app.state.config.OPENAI_API_BASE_URL,
+            "OPENAI_API_KEY": app.state.config.OPENAI_API_KEY,
+        },
+        "automatic1111": {
+            "AUTOMATIC1111_BASE_URL": app.state.config.AUTOMATIC1111_BASE_URL,
+            "AUTOMATIC1111_API_AUTH": app.state.config.AUTOMATIC1111_API_AUTH,
+            "AUTOMATIC1111_CFG_SCALE": app.state.config.AUTOMATIC1111_CFG_SCALE,
+            "AUTOMATIC1111_SAMPLER": app.state.config.AUTOMATIC1111_SAMPLER,
+            "AUTOMATIC1111_SCHEDULER": app.state.config.AUTOMATIC1111_SCHEDULER,
+        },
+        "comfyui": {
+            "COMFYUI_BASE_URL": app.state.config.COMFYUI_BASE_URL,
+            "COMFYUI_WORKFLOW": app.state.config.COMFYUI_WORKFLOW,
+            "COMFYUI_WORKFLOW_NODES": app.state.config.COMFYUI_WORKFLOW_NODES,
+        },
+    }
+class OpenAIConfigForm(BaseModel):
+    OPENAI_API_BASE_URL: str
+    OPENAI_API_KEY: str
+class Automatic1111ConfigForm(BaseModel):
+    AUTOMATIC1111_BASE_URL: str
+    AUTOMATIC1111_API_AUTH: str
+    AUTOMATIC1111_CFG_SCALE: Optional[str]
+    AUTOMATIC1111_SAMPLER: Optional[str]
+    AUTOMATIC1111_SCHEDULER: Optional[str]
+class ComfyUIConfigForm(BaseModel):
+    COMFYUI_BASE_URL: str
+    COMFYUI_WORKFLOW: str
+    COMFYUI_WORKFLOW_NODES: list[dict]
+class ConfigForm(BaseModel):
+    enabled: bool
+    engine: str
+    openai: OpenAIConfigForm
+    automatic1111: Automatic1111ConfigForm
+    comfyui: ComfyUIConfigForm
+@app.post("/config/update")
+async def update_config(form_data: ConfigForm, user=Depends(get_admin_user)):
+    app.state.config.ENGINE = form_data.engine
+    app.state.config.ENABLED = form_data.enabled
+    app.state.config.OPENAI_API_BASE_URL = form_data.openai.OPENAI_API_BASE_URL
+    app.state.config.OPENAI_API_KEY = form_data.openai.OPENAI_API_KEY
+    app.state.config.AUTOMATIC1111_BASE_URL = (
+        form_data.automatic1111.AUTOMATIC1111_BASE_URL
+    )
+    app.state.config.AUTOMATIC1111_API_AUTH = (
+        form_data.automatic1111.AUTOMATIC1111_API_AUTH
+    )
+    app.state.config.AUTOMATIC1111_CFG_SCALE = (
+        float(form_data.automatic1111.AUTOMATIC1111_CFG_SCALE)
+        if form_data.automatic1111.AUTOMATIC1111_CFG_SCALE
+        else None
+    )
+    app.state.config.AUTOMATIC1111_SAMPLER = (
+        form_data.automatic1111.AUTOMATIC1111_SAMPLER
+        if form_data.automatic1111.AUTOMATIC1111_SAMPLER
+        else None
+    )
+    app.state.config.AUTOMATIC1111_SCHEDULER = (
+        form_data.automatic1111.AUTOMATIC1111_SCHEDULER
+        if form_data.automatic1111.AUTOMATIC1111_SCHEDULER
+        else None
+    )
+    app.state.config.COMFYUI_BASE_URL = form_data.comfyui.COMFYUI_BASE_URL.strip("/")
+    app.state.config.COMFYUI_WORKFLOW = form_data.comfyui.COMFYUI_WORKFLOW
+    app.state.config.COMFYUI_WORKFLOW_NODES = form_data.comfyui.COMFYUI_WORKFLOW_NODES
+    return {
+        "enabled": app.state.config.ENABLED,
+        "engine": app.state.config.ENGINE,
+        "openai": {
+            "OPENAI_API_BASE_URL": app.state.config.OPENAI_API_BASE_URL,
+            "OPENAI_API_KEY": app.state.config.OPENAI_API_KEY,
+        },
+        "automatic1111": {
+            "AUTOMATIC1111_BASE_URL": app.state.config.AUTOMATIC1111_BASE_URL,
+            "AUTOMATIC1111_API_AUTH": app.state.config.AUTOMATIC1111_API_AUTH,
+            "AUTOMATIC1111_CFG_SCALE": app.state.config.AUTOMATIC1111_CFG_SCALE,
+            "AUTOMATIC1111_SAMPLER": app.state.config.AUTOMATIC1111_SAMPLER,
+            "AUTOMATIC1111_SCHEDULER": app.state.config.AUTOMATIC1111_SCHEDULER,
+        },
+        "comfyui": {
+            "COMFYUI_BASE_URL": app.state.config.COMFYUI_BASE_URL,
+            "COMFYUI_WORKFLOW": app.state.config.COMFYUI_WORKFLOW,
+            "COMFYUI_WORKFLOW_NODES": app.state.config.COMFYUI_WORKFLOW_NODES,
+        },
+    }
+def get_automatic1111_api_auth():
+    if app.state.config.AUTOMATIC1111_API_AUTH is None:
+        return ""
+    else:
+        auth1111_byte_string = app.state.config.AUTOMATIC1111_API_AUTH.encode("utf-8")
+        auth1111_base64_encoded_bytes = base64.b64encode(auth1111_byte_string)
+        auth1111_base64_encoded_string = auth1111_base64_encoded_bytes.decode("utf-8")
+        return f"Basic {auth1111_base64_encoded_string}"
+@app.get("/config/url/verify")
+async def verify_url(user=Depends(get_admin_user)):
+    if app.state.config.ENGINE == "automatic1111":
+        try:
+            r = requests.get(
+                url=f"{app.state.config.AUTOMATIC1111_BASE_URL}/sdapi/v1/options",
+                headers={"authorization": get_automatic1111_api_auth()},
+            )
+            r.raise_for_status()
+            return True
+        except Exception:
+            app.state.config.ENABLED = False
+            raise HTTPException(status_code=400, detail=ERROR_MESSAGES.INVALID_URL)
+    elif app.state.config.ENGINE == "comfyui":
+        try:
+            r = requests.get(url=f"{app.state.config.COMFYUI_BASE_URL}/object_info")
+            r.raise_for_status()
+            return True
+        except Exception:
+            app.state.config.ENABLED = False
+            raise HTTPException(status_code=400, detail=ERROR_MESSAGES.INVALID_URL)
+    else:
+        return True
+def set_image_model(model: str):
+    log.info(f"Setting image model to {model}")
+    app.state.config.MODEL = model
+    if app.state.config.ENGINE in ["", "automatic1111"]:
+        api_auth = get_automatic1111_api_auth()
+        r = requests.get(
+            url=f"{app.state.config.AUTOMATIC1111_BASE_URL}/sdapi/v1/options",
+            headers={"authorization": api_auth},
+        )
+        options = r.json()
+        if model != options["sd_model_checkpoint"]:
+            options["sd_model_checkpoint"] = model
+            r = requests.post(
+                url=f"{app.state.config.AUTOMATIC1111_BASE_URL}/sdapi/v1/options",
+                json=options,
+                headers={"authorization": api_auth},
+            )
+    return app.state.config.MODEL
+def get_image_model():
+    if app.state.config.ENGINE == "openai":
+        return app.state.config.MODEL if app.state.config.MODEL else "dall-e-2"
+    elif app.state.config.ENGINE == "comfyui":
+        return app.state.config.MODEL if app.state.config.MODEL else ""
+    elif app.state.config.ENGINE == "automatic1111" or app.state.config.ENGINE == "":
+        try:
+            r = requests.get(
+                url=f"{app.state.config.AUTOMATIC1111_BASE_URL}/sdapi/v1/options",
+                headers={"authorization": get_automatic1111_api_auth()},
+            )
+            options = r.json()
+            return options["sd_model_checkpoint"]
+        except Exception as e:
+            app.state.config.ENABLED = False
+            raise HTTPException(status_code=400, detail=ERROR_MESSAGES.DEFAULT(e))
+class ImageConfigForm(BaseModel):
+    MODEL: str
+    IMAGE_SIZE: str
+    IMAGE_STEPS: int
+@app.get("/image/config")
+async def get_image_config(user=Depends(get_admin_user)):
+    return {
+        "MODEL": app.state.config.MODEL,
+        "IMAGE_SIZE": app.state.config.IMAGE_SIZE,
+        "IMAGE_STEPS": app.state.config.IMAGE_STEPS,
+    }
+@app.post("/image/config/update")
+async def update_image_config(form_data: ImageConfigForm, user=Depends(get_admin_user)):
+    set_image_model(form_data.MODEL)
+    pattern = r"^\d+x\d+$"
+    if re.match(pattern, form_data.IMAGE_SIZE):
+        app.state.config.IMAGE_SIZE = form_data.IMAGE_SIZE
+    else:
+        raise HTTPException(
+            status_code=400,
+            detail=ERROR_MESSAGES.INCORRECT_FORMAT("  (e.g., 512x512)."),
+        )
+    if form_data.IMAGE_STEPS >= 0:
+        app.state.config.IMAGE_STEPS = form_data.IMAGE_STEPS
+    else:
+        raise HTTPException(
+            status_code=400,
+            detail=ERROR_MESSAGES.INCORRECT_FORMAT("  (e.g., 50)."),
+        )
+    return {
+        "MODEL": app.state.config.MODEL,
+        "IMAGE_SIZE": app.state.config.IMAGE_SIZE,
+        "IMAGE_STEPS": app.state.config.IMAGE_STEPS,
+    }
+@app.get("/models")
+def get_models(user=Depends(get_verified_user)):
+    try:
+        if app.state.config.ENGINE == "openai":
+            return [
+                {"id": "dall-e-2", "name": "DALL·E 2"},
+                {"id": "dall-e-3", "name": "DALL·E 3"},
+            ]
+        elif app.state.config.ENGINE == "comfyui":
+            # TODO - get models from comfyui
+            r = requests.get(url=f"{app.state.config.COMFYUI_BASE_URL}/object_info")
+            info = r.json()
+            workflow = json.loads(app.state.config.COMFYUI_WORKFLOW)
+            model_node_id = None
+            for node in app.state.config.COMFYUI_WORKFLOW_NODES:
+                if node["type"] == "model":
+                    if node["node_ids"]:
+                        model_node_id = node["node_ids"][0]
+                    break
+            if model_node_id:
+                model_list_key = None
+                print(workflow[model_node_id]["class_type"])
+                for key in info[workflow[model_node_id]["class_type"]]["input"][
+                    "required"
+                ]:
+                    if "_name" in key:
+                        model_list_key = key
+                        break
+                if model_list_key:
+                    return list(
+                        map(
+                            lambda model: {"id": model, "name": model},
+                            info[workflow[model_node_id]["class_type"]]["input"][
+                                "required"
+                            ][model_list_key][0],
+                        )
+                    )
+            else:
+                return list(
+                    map(
+                        lambda model: {"id": model, "name": model},
+                        info["CheckpointLoaderSimple"]["input"]["required"][
+                            "ckpt_name"
+                        ][0],
+                    )
+                )
+        elif (
+            app.state.config.ENGINE == "automatic1111" or app.state.config.ENGINE == ""
+        ):
+            r = requests.get(
+                url=f"{app.state.config.AUTOMATIC1111_BASE_URL}/sdapi/v1/sd-models",
+                headers={"authorization": get_automatic1111_api_auth()},
+            )
+            models = r.json()
+            return list(
+                map(
+                    lambda model: {"id": model["title"], "name": model["model_name"]},
+                    models,
+                )
+            )
+    except Exception as e:
+        app.state.config.ENABLED = False
+        raise HTTPException(status_code=400, detail=ERROR_MESSAGES.DEFAULT(e))
+class GenerateImageForm(BaseModel):
+    model: Optional[str] = None
+    prompt: str
+    size: Optional[str] = None
+    n: int = 1
+    negative_prompt: Optional[str] = None
+def save_b64_image(b64_str):
+    try:
+        image_id = str(uuid.uuid4())
+        if "," in b64_str:
+            header, encoded = b64_str.split(",", 1)
+            mime_type = header.split(";")[0]
+            img_data = base64.b64decode(encoded)
+            image_format = mimetypes.guess_extension(mime_type)
+            image_filename = f"{image_id}{image_format}"
+            file_path = IMAGE_CACHE_DIR / f"{image_filename}"
+            with open(file_path, "wb") as f:
+                f.write(img_data)
+            return image_filename
+        else:
+            image_filename = f"{image_id}.png"
+            file_path = IMAGE_CACHE_DIR.joinpath(image_filename)
+            img_data = base64.b64decode(b64_str)
+            # Write the image data to a file
+            with open(file_path, "wb") as f:
+                f.write(img_data)
+            return image_filename
+    except Exception as e:
+        log.exception(f"Error saving image: {e}")
+        return None
+def save_url_image(url):
+    image_id = str(uuid.uuid4())
+    try:
+        r = requests.get(url)
+        r.raise_for_status()
+        if r.headers["content-type"].split("/")[0] == "image":
+            mime_type = r.headers["content-type"]
+            image_format = mimetypes.guess_extension(mime_type)
+            if not image_format:
+                raise ValueError("Could not determine image type from MIME type")
+            image_filename = f"{image_id}{image_format}"
+            file_path = IMAGE_CACHE_DIR.joinpath(f"{image_filename}")
+            with open(file_path, "wb") as image_file:
+                for chunk in r.iter_content(chunk_size=8192):
+                    image_file.write(chunk)
+            return image_filename
+        else:
+            log.error("Url does not point to an image.")
+            return None
+    except Exception as e:
+        log.exception(f"Error saving image: {e}")
+        return None
+@app.post("/generations")
+async def image_generations(
+    form_data: GenerateImageForm,
+    user=Depends(get_verified_user),
+):
+    width, height = tuple(map(int, app.state.config.IMAGE_SIZE.split("x")))
+    r = None
+    try:
+        if app.state.config.ENGINE == "openai":
+            headers = {}
+            headers["Authorization"] = f"Bearer {app.state.config.OPENAI_API_KEY}"
+            headers["Content-Type"] = "application/json"
+            if ENABLE_FORWARD_USER_INFO_HEADERS:
+                headers["X-OpenWebUI-User-Name"] = user.name
+                headers["X-OpenWebUI-User-Id"] = user.id
+                headers["X-OpenWebUI-User-Email"] = user.email
+                headers["X-OpenWebUI-User-Role"] = user.role
+            data = {
+                "model": (
+                    app.state.config.MODEL
+                    if app.state.config.MODEL != ""
+                    else "dall-e-2"
+                ),
+                "prompt": form_data.prompt,
+                "n": form_data.n,
+                "size": (
+                    form_data.size if form_data.size else app.state.config.IMAGE_SIZE
+                ),
+                "response_format": "b64_json",
+            }
+            # Use asyncio.to_thread for the requests.post call
+            r = await asyncio.to_thread(
+                requests.post,
+                url=f"{app.state.config.OPENAI_API_BASE_URL}/images/generations",
+                json=data,
+                headers=headers,
+            )
+            r.raise_for_status()
+            res = r.json()
+            images = []
+            for image in res["data"]:
+                image_filename = save_b64_image(image["b64_json"])
+                images.append({"url": f"/cache/image/generations/{image_filename}"})
+                file_body_path = IMAGE_CACHE_DIR.joinpath(f"{image_filename}.json")
+                with open(file_body_path, "w") as f:
+                    json.dump(data, f)
+            return images
+        elif app.state.config.ENGINE == "comfyui":
+            data = {
+                "prompt": form_data.prompt,
+                "width": width,
+                "height": height,
+                "n": form_data.n,
+            }
+            if app.state.config.IMAGE_STEPS is not None:
+                data["steps"] = app.state.config.IMAGE_STEPS
+            if form_data.negative_prompt is not None:
+                data["negative_prompt"] = form_data.negative_prompt
+            form_data = ComfyUIGenerateImageForm(
+                **{
+                    "workflow": ComfyUIWorkflow(
+                        **{
+                            "workflow": app.state.config.COMFYUI_WORKFLOW,
+                            "nodes": app.state.config.COMFYUI_WORKFLOW_NODES,
+                        }
+                    ),
+                    **data,
+                }
+            )
+            res = await comfyui_generate_image(
+                app.state.config.MODEL,
+                form_data,
+                user.id,
+                app.state.config.COMFYUI_BASE_URL,
+            )
+            log.debug(f"res: {res}")
+            images = []
+            for image in res["data"]:
+                image_filename = save_url_image(image["url"])
+                images.append({"url": f"/cache/image/generations/{image_filename}"})
+                file_body_path = IMAGE_CACHE_DIR.joinpath(f"{image_filename}.json")
+                with open(file_body_path, "w") as f:
+                    json.dump(form_data.model_dump(exclude_none=True), f)
+            log.debug(f"images: {images}")
+            return images
+        elif (
+            app.state.config.ENGINE == "automatic1111" or app.state.config.ENGINE == ""
+        ):
+            if form_data.model:
+                set_image_model(form_data.model)
+            data = {
+                "prompt": form_data.prompt,
+                "batch_size": form_data.n,
+                "width": width,
+                "height": height,
+            }
+            if app.state.config.IMAGE_STEPS is not None:
+                data["steps"] = app.state.config.IMAGE_STEPS
+            if form_data.negative_prompt is not None:
+                data["negative_prompt"] = form_data.negative_prompt
+            if app.state.config.AUTOMATIC1111_CFG_SCALE:
+                data["cfg_scale"] = app.state.config.AUTOMATIC1111_CFG_SCALE
+            if app.state.config.AUTOMATIC1111_SAMPLER:
+                data["sampler_name"] = app.state.config.AUTOMATIC1111_SAMPLER
+            if app.state.config.AUTOMATIC1111_SCHEDULER:
+                data["scheduler"] = app.state.config.AUTOMATIC1111_SCHEDULER
+            # Use asyncio.to_thread for the requests.post call
+            r = await asyncio.to_thread(
+                requests.post,
+                url=f"{app.state.config.AUTOMATIC1111_BASE_URL}/sdapi/v1/txt2img",
+                json=data,
+                headers={"authorization": get_automatic1111_api_auth()},
+            )
+            res = r.json()
+            log.debug(f"res: {res}")
+            images = []
+            for image in res["images"]:
+                image_filename = save_b64_image(image)
+                images.append({"url": f"/cache/image/generations/{image_filename}"})
+                file_body_path = IMAGE_CACHE_DIR.joinpath(f"{image_filename}.json")
+                with open(file_body_path, "w") as f:
+                    json.dump({**data, "info": res["info"]}, f)
+            return images
+    except Exception as e:
+        error = e
+        if r != None:
+            data = r.json()
+            if "error" in data:
+                error = data["error"]["message"]
+        raise HTTPException(status_code=400, detail=ERROR_MESSAGES.DEFAULT(error))

backend/open_webui/apps/images/utils/comfyui.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import asyncio
+import json
+import logging
+import random
+import urllib.parse
+import urllib.request
+from typing import Optional
+import websocket  # NOTE: websocket-client (https://github.com/websocket-client/websocket-client)
+from open_webui.env import SRC_LOG_LEVELS
+from pydantic import BaseModel
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["COMFYUI"])
+default_headers = {"User-Agent": "Mozilla/5.0"}
+def queue_prompt(prompt, client_id, base_url):
+    log.info("queue_prompt")
+    p = {"prompt": prompt, "client_id": client_id}
+    data = json.dumps(p).encode("utf-8")
+    log.debug(f"queue_prompt data: {data}")
+    try:
+        req = urllib.request.Request(
+            f"{base_url}/prompt", data=data, headers=default_headers
+        )
+        response = urllib.request.urlopen(req).read()
+        return json.loads(response)
+    except Exception as e:
+        log.exception(f"Error while queuing prompt: {e}")
+        raise e
+def get_image(filename, subfolder, folder_type, base_url):
+    log.info("get_image")
+    data = {"filename": filename, "subfolder": subfolder, "type": folder_type}
+    url_values = urllib.parse.urlencode(data)
+    req = urllib.request.Request(
+        f"{base_url}/view?{url_values}", headers=default_headers
+    )
+    with urllib.request.urlopen(req) as response:
+        return response.read()
+def get_image_url(filename, subfolder, folder_type, base_url):
+    log.info("get_image")
+    data = {"filename": filename, "subfolder": subfolder, "type": folder_type}
+    url_values = urllib.parse.urlencode(data)
+    return f"{base_url}/view?{url_values}"
+def get_history(prompt_id, base_url):
+    log.info("get_history")
+    req = urllib.request.Request(
+        f"{base_url}/history/{prompt_id}", headers=default_headers
+    )
+    with urllib.request.urlopen(req) as response:
+        return json.loads(response.read())
+def get_images(ws, prompt, client_id, base_url):
+    prompt_id = queue_prompt(prompt, client_id, base_url)["prompt_id"]
+    output_images = []
+    while True:
+        out = ws.recv()
+        if isinstance(out, str):
+            message = json.loads(out)
+            if message["type"] == "executing":
+                data = message["data"]
+                if data["node"] is None and data["prompt_id"] == prompt_id:
+                    break  # Execution is done
+        else:
+            continue  # previews are binary data
+    history = get_history(prompt_id, base_url)[prompt_id]
+    for o in history["outputs"]:
+        for node_id in history["outputs"]:
+            node_output = history["outputs"][node_id]
+            if "images" in node_output:
+                for image in node_output["images"]:
+                    url = get_image_url(
+                        image["filename"], image["subfolder"], image["type"], base_url
+                    )
+                    output_images.append({"url": url})
+    return {"data": output_images}
+class ComfyUINodeInput(BaseModel):
+    type: Optional[str] = None
+    node_ids: list[str] = []
+    key: Optional[str] = "text"
+    value: Optional[str] = None
+class ComfyUIWorkflow(BaseModel):
+    workflow: str
+    nodes: list[ComfyUINodeInput]
+class ComfyUIGenerateImageForm(BaseModel):
+    workflow: ComfyUIWorkflow
+    prompt: str
+    negative_prompt: Optional[str] = None
+    width: int
+    height: int
+    n: int = 1
+    steps: Optional[int] = None
+    seed: Optional[int] = None
+async def comfyui_generate_image(
+    model: str, payload: ComfyUIGenerateImageForm, client_id, base_url
+):
+    ws_url = base_url.replace("http://", "ws://").replace("https://", "wss://")
+    workflow = json.loads(payload.workflow.workflow)
+    for node in payload.workflow.nodes:
+        if node.type:
+            if node.type == "model":
+                for node_id in node.node_ids:
+                    workflow[node_id]["inputs"][node.key] = model
+            elif node.type == "prompt":
+                for node_id in node.node_ids:
+                    workflow[node_id]["inputs"][
+                        node.key if node.key else "text"
+                    ] = payload.prompt
+            elif node.type == "negative_prompt":
+                for node_id in node.node_ids:
+                    workflow[node_id]["inputs"][
+                        node.key if node.key else "text"
+                    ] = payload.negative_prompt
+            elif node.type == "width":
+                for node_id in node.node_ids:
+                    workflow[node_id]["inputs"][
+                        node.key if node.key else "width"
+                    ] = payload.width
+            elif node.type == "height":
+                for node_id in node.node_ids:
+                    workflow[node_id]["inputs"][
+                        node.key if node.key else "height"
+                    ] = payload.height
+            elif node.type == "n":
+                for node_id in node.node_ids:
+                    workflow[node_id]["inputs"][
+                        node.key if node.key else "batch_size"
+                    ] = payload.n
+            elif node.type == "steps":
+                for node_id in node.node_ids:
+                    workflow[node_id]["inputs"][
+                        node.key if node.key else "steps"
+                    ] = payload.steps
+            elif node.type == "seed":
+                seed = (
+                    payload.seed
+                    if payload.seed
+                    else random.randint(0, 18446744073709551614)
+                )
+                for node_id in node.node_ids:
+                    workflow[node_id]["inputs"][node.key] = seed
+        else:
+            for node_id in node.node_ids:
+                workflow[node_id]["inputs"][node.key] = node.value
+    try:
+        ws = websocket.WebSocket()
+        ws.connect(f"{ws_url}/ws?clientId={client_id}")
+        log.info("WebSocket connection established.")
+    except Exception as e:
+        log.exception(f"Failed to connect to WebSocket server: {e}")
+        return None
+    try:
+        log.info("Sending workflow to WebSocket server.")
+        log.info(f"Workflow: {workflow}")
+        images = await asyncio.to_thread(get_images, ws, workflow, client_id, base_url)
+    except Exception as e:
+        log.exception(f"Error while receiving images: {e}")
+        images = None
+    ws.close()
+    return images

backend/open_webui/apps/ollama/main.py ADDED Viewed

	@@ -0,0 +1,1350 @@

+import asyncio
+import json
+import logging
+import os
+import random
+import re
+import time
+from typing import Optional, Union
+from urllib.parse import urlparse
+import aiohttp
+from aiocache import cached
+import requests
+from open_webui.apps.webui.models.models import Models
+from open_webui.config import (
+    CORS_ALLOW_ORIGIN,
+    ENABLE_OLLAMA_API,
+    OLLAMA_BASE_URLS,
+    OLLAMA_API_CONFIGS,
+    UPLOAD_DIR,
+    AppConfig,
+)
+from open_webui.env import (
+    AIOHTTP_CLIENT_TIMEOUT,
+    AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST,
+)
+from open_webui.constants import ERROR_MESSAGES
+from open_webui.env import ENV, SRC_LOG_LEVELS
+from fastapi import Depends, FastAPI, File, HTTPException, Request, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel, ConfigDict
+from starlette.background import BackgroundTask
+from open_webui.utils.misc import (
+    calculate_sha256,
+)
+from open_webui.utils.payload import (
+    apply_model_params_to_body_ollama,
+    apply_model_params_to_body_openai,
+    apply_model_system_prompt_to_body,
+)
+from open_webui.utils.utils import get_admin_user, get_verified_user
+from open_webui.utils.access_control import has_access
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["OLLAMA"])
+app = FastAPI(
+    docs_url="/docs" if ENV == "dev" else None,
+    openapi_url="/openapi.json" if ENV == "dev" else None,
+    redoc_url=None,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=CORS_ALLOW_ORIGIN,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.state.config = AppConfig()
+app.state.config.ENABLE_OLLAMA_API = ENABLE_OLLAMA_API
+app.state.config.OLLAMA_BASE_URLS = OLLAMA_BASE_URLS
+app.state.config.OLLAMA_API_CONFIGS = OLLAMA_API_CONFIGS
+# TODO: Implement a more intelligent load balancing mechanism for distributing requests among multiple backend instances.
+# Current implementation uses a simple round-robin approach (random.choice). Consider incorporating algorithms like weighted round-robin,
+# least connections, or least response time for better resource utilization and performance optimization.
+@app.head("/")
+@app.get("/")
+async def get_status():
+    return {"status": True}
+class ConnectionVerificationForm(BaseModel):
+    url: str
+    key: Optional[str] = None
+@app.post("/verify")
+async def verify_connection(
+    form_data: ConnectionVerificationForm, user=Depends(get_admin_user)
+):
+    url = form_data.url
+    key = form_data.key
+    headers = {}
+    if key:
+        headers["Authorization"] = f"Bearer {key}"
+    timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST)
+    async with aiohttp.ClientSession(timeout=timeout) as session:
+        try:
+            async with session.get(f"{url}/api/version", headers=headers) as r:
+                if r.status != 200:
+                    # Extract response error details if available
+                    error_detail = f"HTTP Error: {r.status}"
+                    res = await r.json()
+                    if "error" in res:
+                        error_detail = f"External Error: {res['error']}"
+                    raise Exception(error_detail)
+                response_data = await r.json()
+                return response_data
+        except aiohttp.ClientError as e:
+            # ClientError covers all aiohttp requests issues
+            log.exception(f"Client error: {str(e)}")
+            # Handle aiohttp-specific connection issues, timeout etc.
+            raise HTTPException(
+                status_code=500, detail="Open WebUI: Server Connection Error"
+            )
+        except Exception as e:
+            log.exception(f"Unexpected error: {e}")
+            # Generic error handler in case parsing JSON or other steps fail
+            error_detail = f"Unexpected error: {str(e)}"
+            raise HTTPException(status_code=500, detail=error_detail)
+@app.get("/config")
+async def get_config(user=Depends(get_admin_user)):
+    return {
+        "ENABLE_OLLAMA_API": app.state.config.ENABLE_OLLAMA_API,
+        "OLLAMA_BASE_URLS": app.state.config.OLLAMA_BASE_URLS,
+        "OLLAMA_API_CONFIGS": app.state.config.OLLAMA_API_CONFIGS,
+    }
+class OllamaConfigForm(BaseModel):
+    ENABLE_OLLAMA_API: Optional[bool] = None
+    OLLAMA_BASE_URLS: list[str]
+    OLLAMA_API_CONFIGS: dict
+@app.post("/config/update")
+async def update_config(form_data: OllamaConfigForm, user=Depends(get_admin_user)):
+    app.state.config.ENABLE_OLLAMA_API = form_data.ENABLE_OLLAMA_API
+    app.state.config.OLLAMA_BASE_URLS = form_data.OLLAMA_BASE_URLS
+    app.state.config.OLLAMA_API_CONFIGS = form_data.OLLAMA_API_CONFIGS
+    # Remove any extra configs
+    config_urls = app.state.config.OLLAMA_API_CONFIGS.keys()
+    for url in list(app.state.config.OLLAMA_BASE_URLS):
+        if url not in config_urls:
+            app.state.config.OLLAMA_API_CONFIGS.pop(url, None)
+    return {
+        "ENABLE_OLLAMA_API": app.state.config.ENABLE_OLLAMA_API,
+        "OLLAMA_BASE_URLS": app.state.config.OLLAMA_BASE_URLS,
+        "OLLAMA_API_CONFIGS": app.state.config.OLLAMA_API_CONFIGS,
+    }
+async def aiohttp_get(url, key=None):
+    timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST)
+    try:
+        headers = {"Authorization": f"Bearer {key}"} if key else {}
+        async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
+            async with session.get(url, headers=headers) as response:
+                return await response.json()
+    except Exception as e:
+        # Handle connection error here
+        log.error(f"Connection error: {e}")
+        return None
+async def cleanup_response(
+    response: Optional[aiohttp.ClientResponse],
+    session: Optional[aiohttp.ClientSession],
+):
+    if response:
+        response.close()
+    if session:
+        await session.close()
+async def post_streaming_url(
+    url: str, payload: Union[str, bytes], stream: bool = True, content_type=None
+):
+    r = None
+    try:
+        session = aiohttp.ClientSession(
+            trust_env=True, timeout=aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT)
+        )
+        parsed_url = urlparse(url)
+        base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+        api_config = app.state.config.OLLAMA_API_CONFIGS.get(base_url, {})
+        key = api_config.get("key", None)
+        headers = {"Content-Type": "application/json"}
+        if key:
+            headers["Authorization"] = f"Bearer {key}"
+        r = await session.post(
+            url,
+            data=payload,
+            headers=headers,
+        )
+        r.raise_for_status()
+        if stream:
+            response_headers = dict(r.headers)
+            if content_type:
+                response_headers["Content-Type"] = content_type
+            return StreamingResponse(
+                r.content,
+                status_code=r.status,
+                headers=response_headers,
+                background=BackgroundTask(
+                    cleanup_response, response=r, session=session
+                ),
+            )
+        else:
+            res = await r.json()
+            await cleanup_response(r, session)
+            return res
+    except Exception as e:
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = await r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except Exception:
+                error_detail = f"Ollama: {e}"
+        raise HTTPException(
+            status_code=r.status if r else 500,
+            detail=error_detail,
+        )
+def merge_models_lists(model_lists):
+    merged_models = {}
+    for idx, model_list in enumerate(model_lists):
+        if model_list is not None:
+            for model in model_list:
+                id = model["model"]
+                if id not in merged_models:
+                    model["urls"] = [idx]
+                    merged_models[id] = model
+                else:
+                    merged_models[id]["urls"].append(idx)
+    return list(merged_models.values())
+@cached(ttl=3)
+async def get_all_models():
+    log.info("get_all_models()")
+    if app.state.config.ENABLE_OLLAMA_API:
+        tasks = []
+        for idx, url in enumerate(app.state.config.OLLAMA_BASE_URLS):
+            if url not in app.state.config.OLLAMA_API_CONFIGS:
+                tasks.append(aiohttp_get(f"{url}/api/tags"))
+            else:
+                api_config = app.state.config.OLLAMA_API_CONFIGS.get(url, {})
+                enable = api_config.get("enable", True)
+                key = api_config.get("key", None)
+                if enable:
+                    tasks.append(aiohttp_get(f"{url}/api/tags", key))
+                else:
+                    tasks.append(asyncio.ensure_future(asyncio.sleep(0, None)))
+        responses = await asyncio.gather(*tasks)
+        for idx, response in enumerate(responses):
+            if response:
+                url = app.state.config.OLLAMA_BASE_URLS[idx]
+                api_config = app.state.config.OLLAMA_API_CONFIGS.get(url, {})
+                prefix_id = api_config.get("prefix_id", None)
+                model_ids = api_config.get("model_ids", [])
+                if len(model_ids) != 0 and "models" in response:
+                    response["models"] = list(
+                        filter(
+                            lambda model: model["model"] in model_ids,
+                            response["models"],
+                        )
+                    )
+                if prefix_id:
+                    for model in response.get("models", []):
+                        model["model"] = f"{prefix_id}.{model['model']}"
+        models = {
+            "models": merge_models_lists(
+                map(
+                    lambda response: response.get("models", []) if response else None,
+                    responses,
+                )
+            )
+        }
+    else:
+        models = {"models": []}
+    return models
+@app.get("/api/tags")
+@app.get("/api/tags/{url_idx}")
+async def get_ollama_tags(
+    url_idx: Optional[int] = None, user=Depends(get_verified_user)
+):
+    models = []
+    if url_idx is None:
+        models = await get_all_models()
+    else:
+        url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+        parsed_url = urlparse(url)
+        base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+        api_config = app.state.config.OLLAMA_API_CONFIGS.get(base_url, {})
+        key = api_config.get("key", None)
+        headers = {}
+        if key:
+            headers["Authorization"] = f"Bearer {key}"
+        r = None
+        try:
+            r = requests.request(method="GET", url=f"{url}/api/tags", headers=headers)
+            r.raise_for_status()
+            models = r.json()
+        except Exception as e:
+            log.exception(e)
+            error_detail = "Open WebUI: Server Connection Error"
+            if r is not None:
+                try:
+                    res = r.json()
+                    if "error" in res:
+                        error_detail = f"Ollama: {res['error']}"
+                except Exception:
+                    error_detail = f"Ollama: {e}"
+            raise HTTPException(
+                status_code=r.status_code if r else 500,
+                detail=error_detail,
+            )
+    if user.role == "user":
+        # Filter models based on user access control
+        filtered_models = []
+        for model in models.get("models", []):
+            model_info = Models.get_model_by_id(model["model"])
+            if model_info:
+                if user.id == model_info.user_id or has_access(
+                    user.id, type="read", access_control=model_info.access_control
+                ):
+                    filtered_models.append(model)
+        models["models"] = filtered_models
+    return models
+@app.get("/api/version")
+@app.get("/api/version/{url_idx}")
+async def get_ollama_versions(url_idx: Optional[int] = None):
+    if app.state.config.ENABLE_OLLAMA_API:
+        if url_idx is None:
+            # returns lowest version
+            tasks = [
+                aiohttp_get(
+                    f"{url}/api/version",
+                    app.state.config.OLLAMA_API_CONFIGS.get(url, {}).get("key", None),
+                )
+                for url in app.state.config.OLLAMA_BASE_URLS
+            ]
+            responses = await asyncio.gather(*tasks)
+            responses = list(filter(lambda x: x is not None, responses))
+            if len(responses) > 0:
+                lowest_version = min(
+                    responses,
+                    key=lambda x: tuple(
+                        map(int, re.sub(r"^v|-.*", "", x["version"]).split("."))
+                    ),
+                )
+                return {"version": lowest_version["version"]}
+            else:
+                raise HTTPException(
+                    status_code=500,
+                    detail=ERROR_MESSAGES.OLLAMA_NOT_FOUND,
+                )
+        else:
+            url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+            r = None
+            try:
+                r = requests.request(method="GET", url=f"{url}/api/version")
+                r.raise_for_status()
+                return r.json()
+            except Exception as e:
+                log.exception(e)
+                error_detail = "Open WebUI: Server Connection Error"
+                if r is not None:
+                    try:
+                        res = r.json()
+                        if "error" in res:
+                            error_detail = f"Ollama: {res['error']}"
+                    except Exception:
+                        error_detail = f"Ollama: {e}"
+                raise HTTPException(
+                    status_code=r.status_code if r else 500,
+                    detail=error_detail,
+                )
+    else:
+        return {"version": False}
+class ModelNameForm(BaseModel):
+    name: str
+@app.post("/api/pull")
+@app.post("/api/pull/{url_idx}")
+async def pull_model(
+    form_data: ModelNameForm, url_idx: int = 0, user=Depends(get_admin_user)
+):
+    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+    log.info(f"url: {url}")
+    # Admin should be able to pull models from any source
+    payload = {**form_data.model_dump(exclude_none=True), "insecure": True}
+    return await post_streaming_url(f"{url}/api/pull", json.dumps(payload))
+class PushModelForm(BaseModel):
+    name: str
+    insecure: Optional[bool] = None
+    stream: Optional[bool] = None
+@app.delete("/api/push")
+@app.delete("/api/push/{url_idx}")
+async def push_model(
+    form_data: PushModelForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_admin_user),
+):
+    if url_idx is None:
+        model_list = await get_all_models()
+        models = {model["model"]: model for model in model_list["models"]}
+        if form_data.name in models:
+            url_idx = models[form_data.name]["urls"][0]
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.name),
+            )
+    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+    log.debug(f"url: {url}")
+    return await post_streaming_url(
+        f"{url}/api/push", form_data.model_dump_json(exclude_none=True).encode()
+    )
+class CreateModelForm(BaseModel):
+    name: str
+    modelfile: Optional[str] = None
+    stream: Optional[bool] = None
+    path: Optional[str] = None
+@app.post("/api/create")
+@app.post("/api/create/{url_idx}")
+async def create_model(
+    form_data: CreateModelForm, url_idx: int = 0, user=Depends(get_admin_user)
+):
+    log.debug(f"form_data: {form_data}")
+    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+    log.info(f"url: {url}")
+    return await post_streaming_url(
+        f"{url}/api/create", form_data.model_dump_json(exclude_none=True).encode()
+    )
+class CopyModelForm(BaseModel):
+    source: str
+    destination: str
+@app.post("/api/copy")
+@app.post("/api/copy/{url_idx}")
+async def copy_model(
+    form_data: CopyModelForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_admin_user),
+):
+    if url_idx is None:
+        model_list = await get_all_models()
+        models = {model["model"]: model for model in model_list["models"]}
+        if form_data.source in models:
+            url_idx = models[form_data.source]["urls"][0]
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.source),
+            )
+    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+    log.info(f"url: {url}")
+    parsed_url = urlparse(url)
+    base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+    api_config = app.state.config.OLLAMA_API_CONFIGS.get(base_url, {})
+    key = api_config.get("key", None)
+    headers = {"Content-Type": "application/json"}
+    if key:
+        headers["Authorization"] = f"Bearer {key}"
+    r = requests.request(
+        method="POST",
+        url=f"{url}/api/copy",
+        headers=headers,
+        data=form_data.model_dump_json(exclude_none=True).encode(),
+    )
+    try:
+        r.raise_for_status()
+        log.debug(f"r.text: {r.text}")
+        return True
+    except Exception as e:
+        log.exception(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except Exception:
+                error_detail = f"Ollama: {e}"
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+@app.delete("/api/delete")
+@app.delete("/api/delete/{url_idx}")
+async def delete_model(
+    form_data: ModelNameForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_admin_user),
+):
+    if url_idx is None:
+        model_list = await get_all_models()
+        models = {model["model"]: model for model in model_list["models"]}
+        if form_data.name in models:
+            url_idx = models[form_data.name]["urls"][0]
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.name),
+            )
+    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+    log.info(f"url: {url}")
+    parsed_url = urlparse(url)
+    base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+    api_config = app.state.config.OLLAMA_API_CONFIGS.get(base_url, {})
+    key = api_config.get("key", None)
+    headers = {"Content-Type": "application/json"}
+    if key:
+        headers["Authorization"] = f"Bearer {key}"
+    r = requests.request(
+        method="DELETE",
+        url=f"{url}/api/delete",
+        data=form_data.model_dump_json(exclude_none=True).encode(),
+        headers=headers,
+    )
+    try:
+        r.raise_for_status()
+        log.debug(f"r.text: {r.text}")
+        return True
+    except Exception as e:
+        log.exception(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except Exception:
+                error_detail = f"Ollama: {e}"
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+@app.post("/api/show")
+async def show_model_info(form_data: ModelNameForm, user=Depends(get_verified_user)):
+    model_list = await get_all_models()
+    models = {model["model"]: model for model in model_list["models"]}
+    if form_data.name not in models:
+        raise HTTPException(
+            status_code=400,
+            detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.name),
+        )
+    url_idx = random.choice(models[form_data.name]["urls"])
+    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+    log.info(f"url: {url}")
+    parsed_url = urlparse(url)
+    base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+    api_config = app.state.config.OLLAMA_API_CONFIGS.get(base_url, {})
+    key = api_config.get("key", None)
+    headers = {"Content-Type": "application/json"}
+    if key:
+        headers["Authorization"] = f"Bearer {key}"
+    r = requests.request(
+        method="POST",
+        url=f"{url}/api/show",
+        headers=headers,
+        data=form_data.model_dump_json(exclude_none=True).encode(),
+    )
+    try:
+        r.raise_for_status()
+        return r.json()
+    except Exception as e:
+        log.exception(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except Exception:
+                error_detail = f"Ollama: {e}"
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+class GenerateEmbeddingsForm(BaseModel):
+    model: str
+    prompt: str
+    options: Optional[dict] = None
+    keep_alive: Optional[Union[int, str]] = None
+class GenerateEmbedForm(BaseModel):
+    model: str
+    input: list[str] | str
+    truncate: Optional[bool] = None
+    options: Optional[dict] = None
+    keep_alive: Optional[Union[int, str]] = None
+@app.post("/api/embed")
+@app.post("/api/embed/{url_idx}")
+async def generate_embeddings(
+    form_data: GenerateEmbedForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_verified_user),
+):
+    return await generate_ollama_batch_embeddings(form_data, url_idx)
+@app.post("/api/embeddings")
+@app.post("/api/embeddings/{url_idx}")
+async def generate_embeddings(
+    form_data: GenerateEmbeddingsForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_verified_user),
+):
+    return await generate_ollama_embeddings(form_data=form_data, url_idx=url_idx)
+async def generate_ollama_embeddings(
+    form_data: GenerateEmbeddingsForm,
+    url_idx: Optional[int] = None,
+):
+    log.info(f"generate_ollama_embeddings {form_data}")
+    if url_idx is None:
+        model_list = await get_all_models()
+        models = {model["model"]: model for model in model_list["models"]}
+        model = form_data.model
+        if ":" not in model:
+            model = f"{model}:latest"
+        if model in models:
+            url_idx = random.choice(models[model]["urls"])
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
+            )
+    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+    log.info(f"url: {url}")
+    parsed_url = urlparse(url)
+    base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+    api_config = app.state.config.OLLAMA_API_CONFIGS.get(base_url, {})
+    key = api_config.get("key", None)
+    headers = {"Content-Type": "application/json"}
+    if key:
+        headers["Authorization"] = f"Bearer {key}"
+    r = requests.request(
+        method="POST",
+        url=f"{url}/api/embeddings",
+        headers=headers,
+        data=form_data.model_dump_json(exclude_none=True).encode(),
+    )
+    try:
+        r.raise_for_status()
+        data = r.json()
+        log.info(f"generate_ollama_embeddings {data}")
+        if "embedding" in data:
+            return data
+        else:
+            raise Exception("Something went wrong :/")
+    except Exception as e:
+        log.exception(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except Exception:
+                error_detail = f"Ollama: {e}"
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+async def generate_ollama_batch_embeddings(
+    form_data: GenerateEmbedForm,
+    url_idx: Optional[int] = None,
+):
+    log.info(f"generate_ollama_batch_embeddings {form_data}")
+    if url_idx is None:
+        model_list = await get_all_models()
+        models = {model["model"]: model for model in model_list["models"]}
+        model = form_data.model
+        if ":" not in model:
+            model = f"{model}:latest"
+        if model in models:
+            url_idx = random.choice(models[model]["urls"])
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
+            )
+    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+    log.info(f"url: {url}")
+    parsed_url = urlparse(url)
+    base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+    api_config = app.state.config.OLLAMA_API_CONFIGS.get(base_url, {})
+    key = api_config.get("key", None)
+    headers = {"Content-Type": "application/json"}
+    if key:
+        headers["Authorization"] = f"Bearer {key}"
+    r = requests.request(
+        method="POST",
+        url=f"{url}/api/embed",
+        headers=headers,
+        data=form_data.model_dump_json(exclude_none=True).encode(),
+    )
+    try:
+        r.raise_for_status()
+        data = r.json()
+        log.info(f"generate_ollama_batch_embeddings {data}")
+        if "embeddings" in data:
+            return data
+        else:
+            raise Exception("Something went wrong :/")
+    except Exception as e:
+        log.exception(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except Exception:
+                error_detail = f"Ollama: {e}"
+        raise Exception(error_detail)
+class GenerateCompletionForm(BaseModel):
+    model: str
+    prompt: str
+    suffix: Optional[str] = None
+    images: Optional[list[str]] = None
+    format: Optional[str] = None
+    options: Optional[dict] = None
+    system: Optional[str] = None
+    template: Optional[str] = None
+    context: Optional[list[int]] = None
+    stream: Optional[bool] = True
+    raw: Optional[bool] = None
+    keep_alive: Optional[Union[int, str]] = None
+@app.post("/api/generate")
+@app.post("/api/generate/{url_idx}")
+async def generate_completion(
+    form_data: GenerateCompletionForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_verified_user),
+):
+    if url_idx is None:
+        model_list = await get_all_models()
+        models = {model["model"]: model for model in model_list["models"]}
+        model = form_data.model
+        if ":" not in model:
+            model = f"{model}:latest"
+        if model in models:
+            url_idx = random.choice(models[model]["urls"])
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
+            )
+    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+    api_config = app.state.config.OLLAMA_API_CONFIGS.get(url, {})
+    prefix_id = api_config.get("prefix_id", None)
+    if prefix_id:
+        form_data.model = form_data.model.replace(f"{prefix_id}.", "")
+    log.info(f"url: {url}")
+    return await post_streaming_url(
+        f"{url}/api/generate", form_data.model_dump_json(exclude_none=True).encode()
+    )
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+    images: Optional[list[str]] = None
+class GenerateChatCompletionForm(BaseModel):
+    model: str
+    messages: list[ChatMessage]
+    format: Optional[str] = None
+    options: Optional[dict] = None
+    template: Optional[str] = None
+    stream: Optional[bool] = True
+    keep_alive: Optional[Union[int, str]] = None
+async def get_ollama_url(url_idx: Optional[int], model: str):
+    if url_idx is None:
+        model_list = await get_all_models()
+        models = {model["model"]: model for model in model_list["models"]}
+        if model not in models:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(model),
+            )
+        url_idx = random.choice(models[model]["urls"])
+    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+    return url
+@app.post("/api/chat")
+@app.post("/api/chat/{url_idx}")
+async def generate_chat_completion(
+    form_data: GenerateChatCompletionForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_verified_user),
+    bypass_filter: Optional[bool] = False,
+):
+    payload = {**form_data.model_dump(exclude_none=True)}
+    log.debug(f"generate_chat_completion() - 1.payload = {payload}")
+    if "metadata" in payload:
+        del payload["metadata"]
+    model_id = payload["model"]
+    model_info = Models.get_model_by_id(model_id)
+    if model_info:
+        if model_info.base_model_id:
+            payload["model"] = model_info.base_model_id
+        params = model_info.params.model_dump()
+        if params:
+            if payload.get("options") is None:
+                payload["options"] = {}
+            payload["options"] = apply_model_params_to_body_ollama(
+                params, payload["options"]
+            )
+            payload = apply_model_system_prompt_to_body(params, payload, user)
+        # Check if user has access to the model
+        if not bypass_filter and user.role == "user":
+            if not (
+                user.id == model_info.user_id
+                or has_access(
+                    user.id, type="read", access_control=model_info.access_control
+                )
+            ):
+                raise HTTPException(
+                    status_code=403,
+                    detail="Model not found",
+                )
+    elif not bypass_filter:
+        if user.role != "admin":
+            raise HTTPException(
+                status_code=403,
+                detail="Model not found",
+            )
+    if ":" not in payload["model"]:
+        payload["model"] = f"{payload['model']}:latest"
+    url = await get_ollama_url(url_idx, payload["model"])
+    log.info(f"url: {url}")
+    log.debug(f"generate_chat_completion() - 2.payload = {payload}")
+    parsed_url = urlparse(url)
+    base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+    api_config = app.state.config.OLLAMA_API_CONFIGS.get(base_url, {})
+    prefix_id = api_config.get("prefix_id", None)
+    if prefix_id:
+        payload["model"] = payload["model"].replace(f"{prefix_id}.", "")
+    return await post_streaming_url(
+        f"{url}/api/chat",
+        json.dumps(payload),
+        stream=form_data.stream,
+        content_type="application/x-ndjson",
+    )
+# TODO: we should update this part once Ollama supports other types
+class OpenAIChatMessageContent(BaseModel):
+    type: str
+    model_config = ConfigDict(extra="allow")
+class OpenAIChatMessage(BaseModel):
+    role: str
+    content: Union[str, list[OpenAIChatMessageContent]]
+    model_config = ConfigDict(extra="allow")
+class OpenAIChatCompletionForm(BaseModel):
+    model: str
+    messages: list[OpenAIChatMessage]
+    model_config = ConfigDict(extra="allow")
+@app.post("/v1/chat/completions")
+@app.post("/v1/chat/completions/{url_idx}")
+async def generate_openai_chat_completion(
+    form_data: dict,
+    url_idx: Optional[int] = None,
+    user=Depends(get_verified_user),
+):
+    try:
+        completion_form = OpenAIChatCompletionForm(**form_data)
+    except Exception as e:
+        log.exception(e)
+        raise HTTPException(
+            status_code=400,
+            detail=str(e),
+        )
+    payload = {**completion_form.model_dump(exclude_none=True, exclude=["metadata"])}
+    if "metadata" in payload:
+        del payload["metadata"]
+    model_id = completion_form.model
+    if ":" not in model_id:
+        model_id = f"{model_id}:latest"
+    model_info = Models.get_model_by_id(model_id)
+    if model_info:
+        if model_info.base_model_id:
+            payload["model"] = model_info.base_model_id
+        params = model_info.params.model_dump()
+        if params:
+            payload = apply_model_params_to_body_openai(params, payload)
+            payload = apply_model_system_prompt_to_body(params, payload, user)
+        # Check if user has access to the model
+        if user.role == "user":
+            if not (
+                user.id == model_info.user_id
+                or has_access(
+                    user.id, type="read", access_control=model_info.access_control
+                )
+            ):
+                raise HTTPException(
+                    status_code=403,
+                    detail="Model not found",
+                )
+    else:
+        if user.role != "admin":
+            raise HTTPException(
+                status_code=403,
+                detail="Model not found",
+            )
+    if ":" not in payload["model"]:
+        payload["model"] = f"{payload['model']}:latest"
+    url = await get_ollama_url(url_idx, payload["model"])
+    log.info(f"url: {url}")
+    api_config = app.state.config.OLLAMA_API_CONFIGS.get(url, {})
+    prefix_id = api_config.get("prefix_id", None)
+    if prefix_id:
+        payload["model"] = payload["model"].replace(f"{prefix_id}.", "")
+    return await post_streaming_url(
+        f"{url}/v1/chat/completions",
+        json.dumps(payload),
+        stream=payload.get("stream", False),
+    )
+@app.get("/v1/models")
+@app.get("/v1/models/{url_idx}")
+async def get_openai_models(
+    url_idx: Optional[int] = None,
+    user=Depends(get_verified_user),
+):
+    models = []
+    if url_idx is None:
+        model_list = await get_all_models()
+        models = [
+            {
+                "id": model["model"],
+                "object": "model",
+                "created": int(time.time()),
+                "owned_by": "openai",
+            }
+            for model in model_list["models"]
+        ]
+    else:
+        url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+        try:
+            r = requests.request(method="GET", url=f"{url}/api/tags")
+            r.raise_for_status()
+            model_list = r.json()
+            models = [
+                {
+                    "id": model["model"],
+                    "object": "model",
+                    "created": int(time.time()),
+                    "owned_by": "openai",
+                }
+                for model in models["models"]
+            ]
+        except Exception as e:
+            log.exception(e)
+            error_detail = "Open WebUI: Server Connection Error"
+            if r is not None:
+                try:
+                    res = r.json()
+                    if "error" in res:
+                        error_detail = f"Ollama: {res['error']}"
+                except Exception:
+                    error_detail = f"Ollama: {e}"
+            raise HTTPException(
+                status_code=r.status_code if r else 500,
+                detail=error_detail,
+            )
+    if user.role == "user":
+        # Filter models based on user access control
+        filtered_models = []
+        for model in models:
+            model_info = Models.get_model_by_id(model["id"])
+            if model_info:
+                if user.id == model_info.user_id or has_access(
+                    user.id, type="read", access_control=model_info.access_control
+                ):
+                    filtered_models.append(model)
+        models = filtered_models
+    return {
+        "data": models,
+        "object": "list",
+    }
+class UrlForm(BaseModel):
+    url: str
+class UploadBlobForm(BaseModel):
+    filename: str
+def parse_huggingface_url(hf_url):
+    try:
+        # Parse the URL
+        parsed_url = urlparse(hf_url)
+        # Get the path and split it into components
+        path_components = parsed_url.path.split("/")
+        # Extract the desired output
+        model_file = path_components[-1]
+        return model_file
+    except ValueError:
+        return None
+async def download_file_stream(
+    ollama_url, file_url, file_path, file_name, chunk_size=1024 * 1024
+):
+    done = False
+    if os.path.exists(file_path):
+        current_size = os.path.getsize(file_path)
+    else:
+        current_size = 0
+    headers = {"Range": f"bytes={current_size}-"} if current_size > 0 else {}
+    timeout = aiohttp.ClientTimeout(total=600)  # Set the timeout
+    async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
+        async with session.get(file_url, headers=headers) as response:
+            total_size = int(response.headers.get("content-length", 0)) + current_size
+            with open(file_path, "ab+") as file:
+                async for data in response.content.iter_chunked(chunk_size):
+                    current_size += len(data)
+                    file.write(data)
+                    done = current_size == total_size
+                    progress = round((current_size / total_size) * 100, 2)
+                    yield f'data: {{"progress": {progress}, "completed": {current_size}, "total": {total_size}}}\n\n'
+                if done:
+                    file.seek(0)
+                    hashed = calculate_sha256(file)
+                    file.seek(0)
+                    url = f"{ollama_url}/api/blobs/sha256:{hashed}"
+                    response = requests.post(url, data=file)
+                    if response.ok:
+                        res = {
+                            "done": done,
+                            "blob": f"sha256:{hashed}",
+                            "name": file_name,
+                        }
+                        os.remove(file_path)
+                        yield f"data: {json.dumps(res)}\n\n"
+                    else:
+                        raise "Ollama: Could not create blob, Please try again."
+# url = "https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/stablelm-zephyr-3b.Q2_K.gguf"
+@app.post("/models/download")
+@app.post("/models/download/{url_idx}")
+async def download_model(
+    form_data: UrlForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_admin_user),
+):
+    allowed_hosts = ["https://huggingface.co/", "https://github.com/"]
+    if not any(form_data.url.startswith(host) for host in allowed_hosts):
+        raise HTTPException(
+            status_code=400,
+            detail="Invalid file_url. Only URLs from allowed hosts are permitted.",
+        )
+    if url_idx is None:
+        url_idx = 0
+    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+    file_name = parse_huggingface_url(form_data.url)
+    if file_name:
+        file_path = f"{UPLOAD_DIR}/{file_name}"
+        return StreamingResponse(
+            download_file_stream(url, form_data.url, file_path, file_name),
+        )
+    else:
+        return None
+@app.post("/models/upload")
+@app.post("/models/upload/{url_idx}")
+def upload_model(
+    file: UploadFile = File(...),
+    url_idx: Optional[int] = None,
+    user=Depends(get_admin_user),
+):
+    if url_idx is None:
+        url_idx = 0
+    ollama_url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+    file_path = f"{UPLOAD_DIR}/{file.filename}"
+    # Save file in chunks
+    with open(file_path, "wb+") as f:
+        for chunk in file.file:
+            f.write(chunk)
+    def file_process_stream():
+        nonlocal ollama_url
+        total_size = os.path.getsize(file_path)
+        chunk_size = 1024 * 1024
+        try:
+            with open(file_path, "rb") as f:
+                total = 0
+                done = False
+                while not done:
+                    chunk = f.read(chunk_size)
+                    if not chunk:
+                        done = True
+                        continue
+                    total += len(chunk)
+                    progress = round((total / total_size) * 100, 2)
+                    res = {
+                        "progress": progress,
+                        "total": total_size,
+                        "completed": total,
+                    }
+                    yield f"data: {json.dumps(res)}\n\n"
+                if done:
+                    f.seek(0)
+                    hashed = calculate_sha256(f)
+                    f.seek(0)
+                    url = f"{ollama_url}/api/blobs/sha256:{hashed}"
+                    response = requests.post(url, data=f)
+                    if response.ok:
+                        res = {
+                            "done": done,
+                            "blob": f"sha256:{hashed}",
+                            "name": file.filename,
+                        }
+                        os.remove(file_path)
+                        yield f"data: {json.dumps(res)}\n\n"
+                    else:
+                        raise Exception(
+                            "Ollama: Could not create blob, Please try again."
+                        )
+        except Exception as e:
+            res = {"error": str(e)}
+            yield f"data: {json.dumps(res)}\n\n"
+    return StreamingResponse(file_process_stream(), media_type="text/event-stream")

backend/open_webui/apps/openai/main.py ADDED Viewed

	@@ -0,0 +1,718 @@

+import asyncio
+import hashlib
+import json
+import logging
+from pathlib import Path
+from typing import Literal, Optional, overload
+import aiohttp
+from aiocache import cached
+import requests
+from open_webui.apps.webui.models.models import Models
+from open_webui.config import (
+    CACHE_DIR,
+    CORS_ALLOW_ORIGIN,
+    ENABLE_OPENAI_API,
+    OPENAI_API_BASE_URLS,
+    OPENAI_API_KEYS,
+    OPENAI_API_CONFIGS,
+    AppConfig,
+)
+from open_webui.env import (
+    AIOHTTP_CLIENT_TIMEOUT,
+    AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST,
+    ENABLE_FORWARD_USER_INFO_HEADERS,
+)
+from open_webui.constants import ERROR_MESSAGES
+from open_webui.env import ENV, SRC_LOG_LEVELS
+from fastapi import Depends, FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse, StreamingResponse
+from pydantic import BaseModel
+from starlette.background import BackgroundTask
+from open_webui.utils.payload import (
+    apply_model_params_to_body_openai,
+    apply_model_system_prompt_to_body,
+)
+from open_webui.utils.utils import get_admin_user, get_verified_user
+from open_webui.utils.access_control import has_access
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["OPENAI"])
+app = FastAPI(
+    docs_url="/docs" if ENV == "dev" else None,
+    openapi_url="/openapi.json" if ENV == "dev" else None,
+    redoc_url=None,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=CORS_ALLOW_ORIGIN,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.state.config = AppConfig()
+app.state.config.ENABLE_OPENAI_API = ENABLE_OPENAI_API
+app.state.config.OPENAI_API_BASE_URLS = OPENAI_API_BASE_URLS
+app.state.config.OPENAI_API_KEYS = OPENAI_API_KEYS
+app.state.config.OPENAI_API_CONFIGS = OPENAI_API_CONFIGS
+@app.get("/config")
+async def get_config(user=Depends(get_admin_user)):
+    return {
+        "ENABLE_OPENAI_API": app.state.config.ENABLE_OPENAI_API,
+        "OPENAI_API_BASE_URLS": app.state.config.OPENAI_API_BASE_URLS,
+        "OPENAI_API_KEYS": app.state.config.OPENAI_API_KEYS,
+        "OPENAI_API_CONFIGS": app.state.config.OPENAI_API_CONFIGS,
+    }
+class OpenAIConfigForm(BaseModel):
+    ENABLE_OPENAI_API: Optional[bool] = None
+    OPENAI_API_BASE_URLS: list[str]
+    OPENAI_API_KEYS: list[str]
+    OPENAI_API_CONFIGS: dict
+@app.post("/config/update")
+async def update_config(form_data: OpenAIConfigForm, user=Depends(get_admin_user)):
+    app.state.config.ENABLE_OPENAI_API = form_data.ENABLE_OPENAI_API
+    app.state.config.OPENAI_API_BASE_URLS = form_data.OPENAI_API_BASE_URLS
+    app.state.config.OPENAI_API_KEYS = form_data.OPENAI_API_KEYS
+    # Check if API KEYS length is same than API URLS length
+    if len(app.state.config.OPENAI_API_KEYS) != len(
+        app.state.config.OPENAI_API_BASE_URLS
+    ):
+        if len(app.state.config.OPENAI_API_KEYS) > len(
+            app.state.config.OPENAI_API_BASE_URLS
+        ):
+            app.state.config.OPENAI_API_KEYS = app.state.config.OPENAI_API_KEYS[
+                : len(app.state.config.OPENAI_API_BASE_URLS)
+            ]
+        else:
+            app.state.config.OPENAI_API_KEYS += [""] * (
+                len(app.state.config.OPENAI_API_BASE_URLS)
+                - len(app.state.config.OPENAI_API_KEYS)
+            )
+    app.state.config.OPENAI_API_CONFIGS = form_data.OPENAI_API_CONFIGS
+    # Remove any extra configs
+    config_urls = app.state.config.OPENAI_API_CONFIGS.keys()
+    for idx, url in enumerate(app.state.config.OPENAI_API_BASE_URLS):
+        if url not in config_urls:
+            app.state.config.OPENAI_API_CONFIGS.pop(url, None)
+    return {
+        "ENABLE_OPENAI_API": app.state.config.ENABLE_OPENAI_API,
+        "OPENAI_API_BASE_URLS": app.state.config.OPENAI_API_BASE_URLS,
+        "OPENAI_API_KEYS": app.state.config.OPENAI_API_KEYS,
+        "OPENAI_API_CONFIGS": app.state.config.OPENAI_API_CONFIGS,
+    }
+@app.post("/audio/speech")
+async def speech(request: Request, user=Depends(get_verified_user)):
+    idx = None
+    try:
+        idx = app.state.config.OPENAI_API_BASE_URLS.index("https://api.openai.com/v1")
+        body = await request.body()
+        name = hashlib.sha256(body).hexdigest()
+        SPEECH_CACHE_DIR = Path(CACHE_DIR).joinpath("./audio/speech/")
+        SPEECH_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+        file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3")
+        file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json")
+        # Check if the file already exists in the cache
+        if file_path.is_file():
+            return FileResponse(file_path)
+        headers = {}
+        headers["Authorization"] = f"Bearer {app.state.config.OPENAI_API_KEYS[idx]}"
+        headers["Content-Type"] = "application/json"
+        if "openrouter.ai" in app.state.config.OPENAI_API_BASE_URLS[idx]:
+            headers["HTTP-Referer"] = "https://openwebui.com/"
+            headers["X-Title"] = "Open WebUI"
+        if ENABLE_FORWARD_USER_INFO_HEADERS:
+            headers["X-OpenWebUI-User-Name"] = user.name
+            headers["X-OpenWebUI-User-Id"] = user.id
+            headers["X-OpenWebUI-User-Email"] = user.email
+            headers["X-OpenWebUI-User-Role"] = user.role
+        r = None
+        try:
+            r = requests.post(
+                url=f"{app.state.config.OPENAI_API_BASE_URLS[idx]}/audio/speech",
+                data=body,
+                headers=headers,
+                stream=True,
+            )
+            r.raise_for_status()
+            # Save the streaming content to a file
+            with open(file_path, "wb") as f:
+                for chunk in r.iter_content(chunk_size=8192):
+                    f.write(chunk)
+            with open(file_body_path, "w") as f:
+                json.dump(json.loads(body.decode("utf-8")), f)
+            # Return the saved file
+            return FileResponse(file_path)
+        except Exception as e:
+            log.exception(e)
+            error_detail = "Open WebUI: Server Connection Error"
+            if r is not None:
+                try:
+                    res = r.json()
+                    if "error" in res:
+                        error_detail = f"External: {res['error']}"
+                except Exception:
+                    error_detail = f"External: {e}"
+            raise HTTPException(
+                status_code=r.status_code if r else 500, detail=error_detail
+            )
+    except ValueError:
+        raise HTTPException(status_code=401, detail=ERROR_MESSAGES.OPENAI_NOT_FOUND)
+async def aiohttp_get(url, key=None):
+    timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST)
+    try:
+        headers = {"Authorization": f"Bearer {key}"} if key else {}
+        async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
+            async with session.get(url, headers=headers) as response:
+                return await response.json()
+    except Exception as e:
+        # Handle connection error here
+        log.error(f"Connection error: {e}")
+        return None
+async def cleanup_response(
+    response: Optional[aiohttp.ClientResponse],
+    session: Optional[aiohttp.ClientSession],
+):
+    if response:
+        response.close()
+    if session:
+        await session.close()
+def merge_models_lists(model_lists):
+    log.debug(f"merge_models_lists {model_lists}")
+    merged_list = []
+    for idx, models in enumerate(model_lists):
+        if models is not None and "error" not in models:
+            merged_list.extend(
+                [
+                    {
+                        **model,
+                        "name": model.get("name", model["id"]),
+                        "owned_by": "openai",
+                        "openai": model,
+                        "urlIdx": idx,
+                    }
+                    for model in models
+                    if "api.openai.com"
+                    not in app.state.config.OPENAI_API_BASE_URLS[idx]
+                    or not any(
+                        name in model["id"]
+                        for name in [
+                            "babbage",
+                            "dall-e",
+                            "davinci",
+                            "embedding",
+                            "tts",
+                            "whisper",
+                        ]
+                    )
+                ]
+            )
+    return merged_list
+async def get_all_models_responses() -> list:
+    if not app.state.config.ENABLE_OPENAI_API:
+        return []
+    # Check if API KEYS length is same than API URLS length
+    num_urls = len(app.state.config.OPENAI_API_BASE_URLS)
+    num_keys = len(app.state.config.OPENAI_API_KEYS)
+    if num_keys != num_urls:
+        # if there are more keys than urls, remove the extra keys
+        if num_keys > num_urls:
+            new_keys = app.state.config.OPENAI_API_KEYS[:num_urls]
+            app.state.config.OPENAI_API_KEYS = new_keys
+        # if there are more urls than keys, add empty keys
+        else:
+            app.state.config.OPENAI_API_KEYS += [""] * (num_urls - num_keys)
+    tasks = []
+    for idx, url in enumerate(app.state.config.OPENAI_API_BASE_URLS):
+        if url not in app.state.config.OPENAI_API_CONFIGS:
+            tasks.append(
+                aiohttp_get(f"{url}/models", app.state.config.OPENAI_API_KEYS[idx])
+            )
+        else:
+            api_config = app.state.config.OPENAI_API_CONFIGS.get(url, {})
+            enable = api_config.get("enable", True)
+            model_ids = api_config.get("model_ids", [])
+            if enable:
+                if len(model_ids) == 0:
+                    tasks.append(
+                        aiohttp_get(
+                            f"{url}/models", app.state.config.OPENAI_API_KEYS[idx]
+                        )
+                    )
+                else:
+                    model_list = {
+                        "object": "list",
+                        "data": [
+                            {
+                                "id": model_id,
+                                "name": model_id,
+                                "owned_by": "openai",
+                                "openai": {"id": model_id},
+                                "urlIdx": idx,
+                            }
+                            for model_id in model_ids
+                        ],
+                    }
+                    tasks.append(asyncio.ensure_future(asyncio.sleep(0, model_list)))
+            else:
+                tasks.append(asyncio.ensure_future(asyncio.sleep(0, None)))
+    responses = await asyncio.gather(*tasks)
+    for idx, response in enumerate(responses):
+        if response:
+            url = app.state.config.OPENAI_API_BASE_URLS[idx]
+            api_config = app.state.config.OPENAI_API_CONFIGS.get(url, {})
+            prefix_id = api_config.get("prefix_id", None)
+            if prefix_id:
+                for model in (
+                    response if isinstance(response, list) else response.get("data", [])
+                ):
+                    model["id"] = f"{prefix_id}.{model['id']}"
+    log.debug(f"get_all_models:responses() {responses}")
+    return responses
+@cached(ttl=3)
+async def get_all_models() -> dict[str, list]:
+    log.info("get_all_models()")
+    if not app.state.config.ENABLE_OPENAI_API:
+        return {"data": []}
+    responses = await get_all_models_responses()
+    def extract_data(response):
+        if response and "data" in response:
+            return response["data"]
+        if isinstance(response, list):
+            return response
+        return None
+    models = {"data": merge_models_lists(map(extract_data, responses))}
+    log.debug(f"models: {models}")
+    return models
+@app.get("/models")
+@app.get("/models/{url_idx}")
+async def get_models(url_idx: Optional[int] = None, user=Depends(get_verified_user)):
+    models = {
+        "data": [],
+    }
+    if url_idx is None:
+        models = await get_all_models()
+    else:
+        url = app.state.config.OPENAI_API_BASE_URLS[url_idx]
+        key = app.state.config.OPENAI_API_KEYS[url_idx]
+        headers = {}
+        headers["Authorization"] = f"Bearer {key}"
+        headers["Content-Type"] = "application/json"
+        if ENABLE_FORWARD_USER_INFO_HEADERS:
+            headers["X-OpenWebUI-User-Name"] = user.name
+            headers["X-OpenWebUI-User-Id"] = user.id
+            headers["X-OpenWebUI-User-Email"] = user.email
+            headers["X-OpenWebUI-User-Role"] = user.role
+        r = None
+        timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST)
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            try:
+                async with session.get(f"{url}/models", headers=headers) as r:
+                    if r.status != 200:
+                        # Extract response error details if available
+                        error_detail = f"HTTP Error: {r.status}"
+                        res = await r.json()
+                        if "error" in res:
+                            error_detail = f"External Error: {res['error']}"
+                        raise Exception(error_detail)
+                    response_data = await r.json()
+                    # Check if we're calling OpenAI API based on the URL
+                    if "api.openai.com" in url:
+                        # Filter models according to the specified conditions
+                        response_data["data"] = [
+                            model
+                            for model in response_data.get("data", [])
+                            if not any(
+                                name in model["id"]
+                                for name in [
+                                    "babbage",
+                                    "dall-e",
+                                    "davinci",
+                                    "embedding",
+                                    "tts",
+                                    "whisper",
+                                ]
+                            )
+                        ]
+                    models = response_data
+            except aiohttp.ClientError as e:
+                # ClientError covers all aiohttp requests issues
+                log.exception(f"Client error: {str(e)}")
+                # Handle aiohttp-specific connection issues, timeout etc.
+                raise HTTPException(
+                    status_code=500, detail="Open WebUI: Server Connection Error"
+                )
+            except Exception as e:
+                log.exception(f"Unexpected error: {e}")
+                # Generic error handler in case parsing JSON or other steps fail
+                error_detail = f"Unexpected error: {str(e)}"
+                raise HTTPException(status_code=500, detail=error_detail)
+    if user.role == "user":
+        # Filter models based on user access control
+        filtered_models = []
+        for model in models.get("data", []):
+            model_info = Models.get_model_by_id(model["id"])
+            if model_info:
+                if user.id == model_info.user_id or has_access(
+                    user.id, type="read", access_control=model_info.access_control
+                ):
+                    filtered_models.append(model)
+        models["data"] = filtered_models
+    return models
+class ConnectionVerificationForm(BaseModel):
+    url: str
+    key: str
+@app.post("/verify")
+async def verify_connection(
+    form_data: ConnectionVerificationForm, user=Depends(get_admin_user)
+):
+    url = form_data.url
+    key = form_data.key
+    headers = {}
+    headers["Authorization"] = f"Bearer {key}"
+    headers["Content-Type"] = "application/json"
+    timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST)
+    async with aiohttp.ClientSession(timeout=timeout) as session:
+        try:
+            async with session.get(f"{url}/models", headers=headers) as r:
+                if r.status != 200:
+                    # Extract response error details if available
+                    error_detail = f"HTTP Error: {r.status}"
+                    res = await r.json()
+                    if "error" in res:
+                        error_detail = f"External Error: {res['error']}"
+                    raise Exception(error_detail)
+                response_data = await r.json()
+                return response_data
+        except aiohttp.ClientError as e:
+            # ClientError covers all aiohttp requests issues
+            log.exception(f"Client error: {str(e)}")
+            # Handle aiohttp-specific connection issues, timeout etc.
+            raise HTTPException(
+                status_code=500, detail="Open WebUI: Server Connection Error"
+            )
+        except Exception as e:
+            log.exception(f"Unexpected error: {e}")
+            # Generic error handler in case parsing JSON or other steps fail
+            error_detail = f"Unexpected error: {str(e)}"
+            raise HTTPException(status_code=500, detail=error_detail)
+@app.post("/chat/completions")
+async def generate_chat_completion(
+    form_data: dict,
+    user=Depends(get_verified_user),
+    bypass_filter: Optional[bool] = False,
+):
+    idx = 0
+    payload = {**form_data}
+    if "metadata" in payload:
+        del payload["metadata"]
+    model_id = form_data.get("model")
+    model_info = Models.get_model_by_id(model_id)
+    # Check model info and override the payload
+    if model_info:
+        if model_info.base_model_id:
+            payload["model"] = model_info.base_model_id
+        params = model_info.params.model_dump()
+        payload = apply_model_params_to_body_openai(params, payload)
+        payload = apply_model_system_prompt_to_body(params, payload, user)
+        # Check if user has access to the model
+        if not bypass_filter and user.role == "user":
+            if not (
+                user.id == model_info.user_id
+                or has_access(
+                    user.id, type="read", access_control=model_info.access_control
+                )
+            ):
+                raise HTTPException(
+                    status_code=403,
+                    detail="Model not found",
+                )
+    elif not bypass_filter:
+        if user.role != "admin":
+            raise HTTPException(
+                status_code=403,
+                detail="Model not found",
+            )
+    # Attemp to get urlIdx from the model
+    models = await get_all_models()
+    # Find the model from the list
+    model = next(
+        (model for model in models["data"] if model["id"] == payload.get("model")),
+        None,
+    )
+    if model:
+        idx = model["urlIdx"]
+    else:
+        raise HTTPException(
+            status_code=404,
+            detail="Model not found",
+        )
+    # Get the API config for the model
+    api_config = app.state.config.OPENAI_API_CONFIGS.get(
+        app.state.config.OPENAI_API_BASE_URLS[idx], {}
+    )
+    prefix_id = api_config.get("prefix_id", None)
+    if prefix_id:
+        payload["model"] = payload["model"].replace(f"{prefix_id}.", "")
+    # Add user info to the payload if the model is a pipeline
+    if "pipeline" in model and model.get("pipeline"):
+        payload["user"] = {
+            "name": user.name,
+            "id": user.id,
+            "email": user.email,
+            "role": user.role,
+        }
+    url = app.state.config.OPENAI_API_BASE_URLS[idx]
+    key = app.state.config.OPENAI_API_KEYS[idx]
+    # Fix: O1 does not support the "max_tokens" parameter, Modify "max_tokens" to "max_completion_tokens"
+    is_o1 = payload["model"].lower().startswith("o1-")
+    # Change max_completion_tokens to max_tokens (Backward compatible)
+    if "api.openai.com" not in url and not is_o1:
+        if "max_completion_tokens" in payload:
+            # Remove "max_completion_tokens" from the payload
+            payload["max_tokens"] = payload["max_completion_tokens"]
+            del payload["max_completion_tokens"]
+    else:
+        if is_o1 and "max_tokens" in payload:
+            payload["max_completion_tokens"] = payload["max_tokens"]
+            del payload["max_tokens"]
+        if "max_tokens" in payload and "max_completion_tokens" in payload:
+            del payload["max_tokens"]
+    # Fix: O1 does not support the "system" parameter, Modify "system" to "user"
+    if is_o1 and payload["messages"][0]["role"] == "system":
+        payload["messages"][0]["role"] = "user"
+    # Convert the modified body back to JSON
+    payload = json.dumps(payload)
+    headers = {}
+    headers["Authorization"] = f"Bearer {key}"
+    headers["Content-Type"] = "application/json"
+    if "openrouter.ai" in app.state.config.OPENAI_API_BASE_URLS[idx]:
+        headers["HTTP-Referer"] = "https://openwebui.com/"
+        headers["X-Title"] = "Open WebUI"
+    if ENABLE_FORWARD_USER_INFO_HEADERS:
+        headers["X-OpenWebUI-User-Name"] = user.name
+        headers["X-OpenWebUI-User-Id"] = user.id
+        headers["X-OpenWebUI-User-Email"] = user.email
+        headers["X-OpenWebUI-User-Role"] = user.role
+    r = None
+    session = None
+    streaming = False
+    response = None
+    try:
+        session = aiohttp.ClientSession(
+            trust_env=True, timeout=aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT)
+        )
+        r = await session.request(
+            method="POST",
+            url=f"{url}/chat/completions",
+            data=payload,
+            headers=headers,
+        )
+        # Check if response is SSE
+        if "text/event-stream" in r.headers.get("Content-Type", ""):
+            streaming = True
+            return StreamingResponse(
+                r.content,
+                status_code=r.status,
+                headers=dict(r.headers),
+                background=BackgroundTask(
+                    cleanup_response, response=r, session=session
+                ),
+            )
+        else:
+            try:
+                response = await r.json()
+            except Exception as e:
+                log.error(e)
+                response = await r.text()
+            r.raise_for_status()
+            return response
+    except Exception as e:
+        log.exception(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if isinstance(response, dict):
+            if "error" in response:
+                error_detail = f"{response['error']['message'] if 'message' in response['error'] else response['error']}"
+        elif isinstance(response, str):
+            error_detail = response
+        raise HTTPException(status_code=r.status if r else 500, detail=error_detail)
+    finally:
+        if not streaming and session:
+            if r:
+                r.close()
+            await session.close()
+@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
+    idx = 0
+    body = await request.body()
+    url = app.state.config.OPENAI_API_BASE_URLS[idx]
+    key = app.state.config.OPENAI_API_KEYS[idx]
+    target_url = f"{url}/{path}"
+    headers = {}
+    headers["Authorization"] = f"Bearer {key}"
+    headers["Content-Type"] = "application/json"
+    if ENABLE_FORWARD_USER_INFO_HEADERS:
+        headers["X-OpenWebUI-User-Name"] = user.name
+        headers["X-OpenWebUI-User-Id"] = user.id
+        headers["X-OpenWebUI-User-Email"] = user.email
+        headers["X-OpenWebUI-User-Role"] = user.role
+    r = None
+    session = None
+    streaming = False
+    try:
+        session = aiohttp.ClientSession(trust_env=True)
+        r = await session.request(
+            method=request.method,
+            url=target_url,
+            data=body,
+            headers=headers,
+        )
+        r.raise_for_status()
+        # Check if response is SSE
+        if "text/event-stream" in r.headers.get("Content-Type", ""):
+            streaming = True
+            return StreamingResponse(
+                r.content,
+                status_code=r.status,
+                headers=dict(r.headers),
+                background=BackgroundTask(
+                    cleanup_response, response=r, session=session
+                ),
+            )
+        else:
+            response_data = await r.json()
+            return response_data
+    except Exception as e:
+        log.exception(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = await r.json()
+                print(res)
+                if "error" in res:
+                    error_detail = f"External: {res['error']['message'] if 'message' in res['error'] else res['error']}"
+            except Exception:
+                error_detail = f"External: {e}"
+        raise HTTPException(status_code=r.status if r else 500, detail=error_detail)
+    finally:
+        if not streaming and session:
+            if r:
+                r.close()
+            await session.close()

backend/open_webui/apps/retrieval/loaders/main.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import requests
+import logging
+import ftfy
+from langchain_community.document_loaders import (
+    BSHTMLLoader,
+    CSVLoader,
+    Docx2txtLoader,
+    OutlookMessageLoader,
+    PyPDFLoader,
+    TextLoader,
+    UnstructuredEPubLoader,
+    UnstructuredExcelLoader,
+    UnstructuredMarkdownLoader,
+    UnstructuredPowerPointLoader,
+    UnstructuredRSTLoader,
+    UnstructuredXMLLoader,
+    YoutubeLoader,
+)
+from langchain_core.documents import Document
+from open_webui.env import SRC_LOG_LEVELS
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["RAG"])
+known_source_ext = [
+    "go",
+    "py",
+    "java",
+    "sh",
+    "bat",
+    "ps1",
+    "cmd",
+    "js",
+    "ts",
+    "css",
+    "cpp",
+    "hpp",
+    "h",
+    "c",
+    "cs",
+    "sql",
+    "log",
+    "ini",
+    "pl",
+    "pm",
+    "r",
+    "dart",
+    "dockerfile",
+    "env",
+    "php",
+    "hs",
+    "hsc",
+    "lua",
+    "nginxconf",
+    "conf",
+    "m",
+    "mm",
+    "plsql",
+    "perl",
+    "rb",
+    "rs",
+    "db2",
+    "scala",
+    "bash",
+    "swift",
+    "vue",
+    "svelte",
+    "msg",
+    "ex",
+    "exs",
+    "erl",
+    "tsx",
+    "jsx",
+    "hs",
+    "lhs",
+]
+class TikaLoader:
+    def __init__(self, url, file_path, mime_type=None):
+        self.url = url
+        self.file_path = file_path
+        self.mime_type = mime_type
+    def load(self) -> list[Document]:
+        with open(self.file_path, "rb") as f:
+            data = f.read()
+        if self.mime_type is not None:
+            headers = {"Content-Type": self.mime_type}
+        else:
+            headers = {}
+        endpoint = self.url
+        if not endpoint.endswith("/"):
+            endpoint += "/"
+        endpoint += "tika/text"
+        r = requests.put(endpoint, data=data, headers=headers)
+        if r.ok:
+            raw_metadata = r.json()
+            text = raw_metadata.get("X-TIKA:content", "<No text content found>")
+            if "Content-Type" in raw_metadata:
+                headers["Content-Type"] = raw_metadata["Content-Type"]
+            log.info("Tika extracted text: %s", text)
+            return [Document(page_content=text, metadata=headers)]
+        else:
+            raise Exception(f"Error calling Tika: {r.reason}")
+class Loader:
+    def __init__(self, engine: str = "", **kwargs):
+        self.engine = engine
+        self.kwargs = kwargs
+    def load(
+        self, filename: str, file_content_type: str, file_path: str
+    ) -> list[Document]:
+        loader = self._get_loader(filename, file_content_type, file_path)
+        docs = loader.load()
+        return [
+            Document(
+                page_content=ftfy.fix_text(doc.page_content), metadata=doc.metadata
+            )
+            for doc in docs
+        ]
+    def _get_loader(self, filename: str, file_content_type: str, file_path: str):
+        file_ext = filename.split(".")[-1].lower()
+        if self.engine == "tika" and self.kwargs.get("TIKA_SERVER_URL"):
+            if file_ext in known_source_ext or (
+                file_content_type and file_content_type.find("text/") >= 0
+            ):
+                loader = TextLoader(file_path, autodetect_encoding=True)
+            else:
+                loader = TikaLoader(
+                    url=self.kwargs.get("TIKA_SERVER_URL"),
+                    file_path=file_path,
+                    mime_type=file_content_type,
+                )
+        else:
+            if file_ext == "pdf":
+                loader = PyPDFLoader(
+                    file_path, extract_images=self.kwargs.get("PDF_EXTRACT_IMAGES")
+                )
+            elif file_ext == "csv":
+                loader = CSVLoader(file_path)
+            elif file_ext == "rst":
+                loader = UnstructuredRSTLoader(file_path, mode="elements")
+            elif file_ext == "xml":
+                loader = UnstructuredXMLLoader(file_path)
+            elif file_ext in ["htm", "html"]:
+                loader = BSHTMLLoader(file_path, open_encoding="unicode_escape")
+            elif file_ext == "md":
+                loader = TextLoader(file_path, autodetect_encoding=True)
+            elif file_content_type == "application/epub+zip":
+                loader = UnstructuredEPubLoader(file_path)
+            elif (
+                file_content_type
+                == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+                or file_ext == "docx"
+            ):
+                loader = Docx2txtLoader(file_path)
+            elif file_content_type in [
+                "application/vnd.ms-excel",
+                "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            ] or file_ext in ["xls", "xlsx"]:
+                loader = UnstructuredExcelLoader(file_path)
+            elif file_content_type in [
+                "application/vnd.ms-powerpoint",
+                "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+            ] or file_ext in ["ppt", "pptx"]:
+                loader = UnstructuredPowerPointLoader(file_path)
+            elif file_ext == "msg":
+                loader = OutlookMessageLoader(file_path)
+            elif file_ext in known_source_ext or (
+                file_content_type and file_content_type.find("text/") >= 0
+            ):
+                loader = TextLoader(file_path, autodetect_encoding=True)
+            else:
+                loader = TextLoader(file_path, autodetect_encoding=True)
+        return loader

backend/open_webui/apps/retrieval/loaders/youtube.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import logging
+from typing import Any, Dict, Generator, List, Optional, Sequence, Union
+from urllib.parse import parse_qs, urlparse
+from langchain_core.documents import Document
+from open_webui.env import SRC_LOG_LEVELS
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["RAG"])
+ALLOWED_SCHEMES = {"http", "https"}
+ALLOWED_NETLOCS = {
+    "youtu.be",
+    "m.youtube.com",
+    "youtube.com",
+    "www.youtube.com",
+    "www.youtube-nocookie.com",
+    "vid.plus",
+}
+def _parse_video_id(url: str) -> Optional[str]:
+    """Parse a YouTube URL and return the video ID if valid, otherwise None."""
+    parsed_url = urlparse(url)
+    if parsed_url.scheme not in ALLOWED_SCHEMES:
+        return None
+    if parsed_url.netloc not in ALLOWED_NETLOCS:
+        return None
+    path = parsed_url.path
+    if path.endswith("/watch"):
+        query = parsed_url.query
+        parsed_query = parse_qs(query)
+        if "v" in parsed_query:
+            ids = parsed_query["v"]
+            video_id = ids if isinstance(ids, str) else ids[0]
+        else:
+            return None
+    else:
+        path = parsed_url.path.lstrip("/")
+        video_id = path.split("/")[-1]
+    if len(video_id) != 11:  # Video IDs are 11 characters long
+        return None
+    return video_id
+class YoutubeLoader:
+    """Load `YouTube` video transcripts."""
+    def __init__(
+        self,
+        video_id: str,
+        language: Union[str, Sequence[str]] = "en",
+        proxy_url: Optional[str] = None,
+    ):
+        """Initialize with YouTube video ID."""
+        _video_id = _parse_video_id(video_id)
+        self.video_id = _video_id if _video_id is not None else video_id
+        self._metadata = {"source": video_id}
+        self.language = language
+        self.proxy_url = proxy_url
+        if isinstance(language, str):
+            self.language = [language]
+        else:
+            self.language = language
+    def load(self) -> List[Document]:
+        """Load YouTube transcripts into `Document` objects."""
+        try:
+            from youtube_transcript_api import (
+                NoTranscriptFound,
+                TranscriptsDisabled,
+                YouTubeTranscriptApi,
+            )
+        except ImportError:
+            raise ImportError(
+                'Could not import "youtube_transcript_api" Python package. '
+                "Please install it with `pip install youtube-transcript-api`."
+            )
+        if self.proxy_url:
+            youtube_proxies = {
+                "http": self.proxy_url,
+                "https": self.proxy_url,
+            }
+            # Don't log complete URL because it might contain secrets
+            log.debug(f"Using proxy URL: {self.proxy_url[:14]}...")
+        else:
+            youtube_proxies = None
+        try:
+            transcript_list = YouTubeTranscriptApi.list_transcripts(
+                self.video_id, proxies=youtube_proxies
+            )
+        except Exception as e:
+            log.exception("Loading YouTube transcript failed")
+            return []
+        try:
+            transcript = transcript_list.find_transcript(self.language)
+        except NoTranscriptFound:
+            transcript = transcript_list.find_transcript(["en"])
+        transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
+        transcript = " ".join(
+            map(
+                lambda transcript_piece: transcript_piece["text"].strip(" "),
+                transcript_pieces,
+            )
+        )
+        return [Document(page_content=transcript, metadata=self._metadata)]

backend/open_webui/apps/retrieval/main.py ADDED Viewed

	@@ -0,0 +1,1494 @@

+# TODO: Merge this with the webui_app and make it a single app
+import json
+import logging
+import mimetypes
+import os
+import shutil
+import uuid
+from datetime import datetime
+from pathlib import Path
+from typing import Iterator, Optional, Sequence, Union
+from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile, status
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import tiktoken
+from open_webui.storage.provider import Storage
+from open_webui.apps.webui.models.knowledge import Knowledges
+from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT
+# Document loaders
+from open_webui.apps.retrieval.loaders.main import Loader
+from open_webui.apps.retrieval.loaders.youtube import YoutubeLoader
+# Web search engines
+from open_webui.apps.retrieval.web.main import SearchResult
+from open_webui.apps.retrieval.web.utils import get_web_loader
+from open_webui.apps.retrieval.web.brave import search_brave
+from open_webui.apps.retrieval.web.mojeek import search_mojeek
+from open_webui.apps.retrieval.web.duckduckgo import search_duckduckgo
+from open_webui.apps.retrieval.web.google_pse import search_google_pse
+from open_webui.apps.retrieval.web.jina_search import search_jina
+from open_webui.apps.retrieval.web.searchapi import search_searchapi
+from open_webui.apps.retrieval.web.searxng import search_searxng
+from open_webui.apps.retrieval.web.serper import search_serper
+from open_webui.apps.retrieval.web.serply import search_serply
+from open_webui.apps.retrieval.web.serpstack import search_serpstack
+from open_webui.apps.retrieval.web.tavily import search_tavily
+from open_webui.apps.retrieval.web.bing import search_bing
+from open_webui.apps.retrieval.utils import (
+    get_embedding_function,
+    get_model_path,
+    query_collection,
+    query_collection_with_hybrid_search,
+    query_doc,
+    query_doc_with_hybrid_search,
+)
+from open_webui.apps.webui.models.files import Files
+from open_webui.config import (
+    BRAVE_SEARCH_API_KEY,
+    MOJEEK_SEARCH_API_KEY,
+    TIKTOKEN_ENCODING_NAME,
+    RAG_TEXT_SPLITTER,
+    CHUNK_OVERLAP,
+    CHUNK_SIZE,
+    CONTENT_EXTRACTION_ENGINE,
+    CORS_ALLOW_ORIGIN,
+    ENABLE_RAG_HYBRID_SEARCH,
+    ENABLE_RAG_LOCAL_WEB_FETCH,
+    ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
+    ENABLE_RAG_WEB_SEARCH,
+    ENV,
+    GOOGLE_PSE_API_KEY,
+    GOOGLE_PSE_ENGINE_ID,
+    PDF_EXTRACT_IMAGES,
+    RAG_EMBEDDING_ENGINE,
+    RAG_EMBEDDING_MODEL,
+    RAG_EMBEDDING_MODEL_AUTO_UPDATE,
+    RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
+    RAG_EMBEDDING_BATCH_SIZE,
+    RAG_FILE_MAX_COUNT,
+    RAG_FILE_MAX_SIZE,
+    RAG_OPENAI_API_BASE_URL,
+    RAG_OPENAI_API_KEY,
+    RAG_OLLAMA_BASE_URL,
+    RAG_OLLAMA_API_KEY,
+    RAG_RELEVANCE_THRESHOLD,
+    RAG_RERANKING_MODEL,
+    RAG_RERANKING_MODEL_AUTO_UPDATE,
+    RAG_RERANKING_MODEL_TRUST_REMOTE_CODE,
+    DEFAULT_RAG_TEMPLATE,
+    RAG_TEMPLATE,
+    RAG_TOP_K,
+    RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
+    RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
+    RAG_WEB_SEARCH_ENGINE,
+    RAG_WEB_SEARCH_RESULT_COUNT,
+    JINA_API_KEY,
+    SEARCHAPI_API_KEY,
+    SEARCHAPI_ENGINE,
+    SEARXNG_QUERY_URL,
+    SERPER_API_KEY,
+    SERPLY_API_KEY,
+    SERPSTACK_API_KEY,
+    SERPSTACK_HTTPS,
+    TAVILY_API_KEY,
+    BING_SEARCH_V7_ENDPOINT,
+    BING_SEARCH_V7_SUBSCRIPTION_KEY,
+    TIKA_SERVER_URL,
+    UPLOAD_DIR,
+    YOUTUBE_LOADER_LANGUAGE,
+    YOUTUBE_LOADER_PROXY_URL,
+    DEFAULT_LOCALE,
+    AppConfig,
+)
+from open_webui.constants import ERROR_MESSAGES
+from open_webui.env import (
+    SRC_LOG_LEVELS,
+    DEVICE_TYPE,
+    DOCKER,
+)
+from open_webui.utils.misc import (
+    calculate_sha256,
+    calculate_sha256_string,
+    extract_folders_after_data_docs,
+    sanitize_filename,
+)
+from open_webui.utils.utils import get_admin_user, get_verified_user
+from langchain.text_splitter import RecursiveCharacterTextSplitter, TokenTextSplitter
+from langchain_core.documents import Document
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["RAG"])
+app = FastAPI(
+    docs_url="/docs" if ENV == "dev" else None,
+    openapi_url="/openapi.json" if ENV == "dev" else None,
+    redoc_url=None,
+)
+app.state.config = AppConfig()
+app.state.config.TOP_K = RAG_TOP_K
+app.state.config.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD
+app.state.config.FILE_MAX_SIZE = RAG_FILE_MAX_SIZE
+app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT
+app.state.config.ENABLE_RAG_HYBRID_SEARCH = ENABLE_RAG_HYBRID_SEARCH
+app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
+    ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
+)
+app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE
+app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL
+app.state.config.TEXT_SPLITTER = RAG_TEXT_SPLITTER
+app.state.config.TIKTOKEN_ENCODING_NAME = TIKTOKEN_ENCODING_NAME
+app.state.config.CHUNK_SIZE = CHUNK_SIZE
+app.state.config.CHUNK_OVERLAP = CHUNK_OVERLAP
+app.state.config.RAG_EMBEDDING_ENGINE = RAG_EMBEDDING_ENGINE
+app.state.config.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL
+app.state.config.RAG_EMBEDDING_BATCH_SIZE = RAG_EMBEDDING_BATCH_SIZE
+app.state.config.RAG_RERANKING_MODEL = RAG_RERANKING_MODEL
+app.state.config.RAG_TEMPLATE = RAG_TEMPLATE
+app.state.config.OPENAI_API_BASE_URL = RAG_OPENAI_API_BASE_URL
+app.state.config.OPENAI_API_KEY = RAG_OPENAI_API_KEY
+app.state.config.OLLAMA_BASE_URL = RAG_OLLAMA_BASE_URL
+app.state.config.OLLAMA_API_KEY = RAG_OLLAMA_API_KEY
+app.state.config.PDF_EXTRACT_IMAGES = PDF_EXTRACT_IMAGES
+app.state.config.YOUTUBE_LOADER_LANGUAGE = YOUTUBE_LOADER_LANGUAGE
+app.state.config.YOUTUBE_LOADER_PROXY_URL = YOUTUBE_LOADER_PROXY_URL
+app.state.YOUTUBE_LOADER_TRANSLATION = None
+app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH
+app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
+app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST
+app.state.config.SEARXNG_QUERY_URL = SEARXNG_QUERY_URL
+app.state.config.GOOGLE_PSE_API_KEY = GOOGLE_PSE_API_KEY
+app.state.config.GOOGLE_PSE_ENGINE_ID = GOOGLE_PSE_ENGINE_ID
+app.state.config.BRAVE_SEARCH_API_KEY = BRAVE_SEARCH_API_KEY
+app.state.config.MOJEEK_SEARCH_API_KEY = MOJEEK_SEARCH_API_KEY
+app.state.config.SERPSTACK_API_KEY = SERPSTACK_API_KEY
+app.state.config.SERPSTACK_HTTPS = SERPSTACK_HTTPS
+app.state.config.SERPER_API_KEY = SERPER_API_KEY
+app.state.config.SERPLY_API_KEY = SERPLY_API_KEY
+app.state.config.TAVILY_API_KEY = TAVILY_API_KEY
+app.state.config.SEARCHAPI_API_KEY = SEARCHAPI_API_KEY
+app.state.config.SEARCHAPI_ENGINE = SEARCHAPI_ENGINE
+app.state.config.JINA_API_KEY = JINA_API_KEY
+app.state.config.BING_SEARCH_V7_ENDPOINT = BING_SEARCH_V7_ENDPOINT
+app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = BING_SEARCH_V7_SUBSCRIPTION_KEY
+app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT
+app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS
+def update_embedding_model(
+    embedding_model: str,
+    auto_update: bool = False,
+):
+    if embedding_model and app.state.config.RAG_EMBEDDING_ENGINE == "":
+        from sentence_transformers import SentenceTransformer
+        try:
+            app.state.sentence_transformer_ef = SentenceTransformer(
+                get_model_path(embedding_model, auto_update),
+                device=DEVICE_TYPE,
+                trust_remote_code=RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
+            )
+        except Exception as e:
+            log.debug(f"Error loading SentenceTransformer: {e}")
+            app.state.sentence_transformer_ef = None
+    else:
+        app.state.sentence_transformer_ef = None
+def update_reranking_model(
+    reranking_model: str,
+    auto_update: bool = False,
+):
+    if reranking_model:
+        if any(model in reranking_model for model in ["jinaai/jina-colbert-v2"]):
+            try:
+                from open_webui.apps.retrieval.models.colbert import ColBERT
+                app.state.sentence_transformer_rf = ColBERT(
+                    get_model_path(reranking_model, auto_update),
+                    env="docker" if DOCKER else None,
+                )
+            except Exception as e:
+                log.error(f"ColBERT: {e}")
+                app.state.sentence_transformer_rf = None
+                app.state.config.ENABLE_RAG_HYBRID_SEARCH = False
+        else:
+            import sentence_transformers
+            try:
+                app.state.sentence_transformer_rf = sentence_transformers.CrossEncoder(
+                    get_model_path(reranking_model, auto_update),
+                    device=DEVICE_TYPE,
+                    trust_remote_code=RAG_RERANKING_MODEL_TRUST_REMOTE_CODE,
+                )
+            except:
+                log.error("CrossEncoder error")
+                app.state.sentence_transformer_rf = None
+                app.state.config.ENABLE_RAG_HYBRID_SEARCH = False
+    else:
+        app.state.sentence_transformer_rf = None
+update_embedding_model(
+    app.state.config.RAG_EMBEDDING_MODEL,
+    RAG_EMBEDDING_MODEL_AUTO_UPDATE,
+)
+update_reranking_model(
+    app.state.config.RAG_RERANKING_MODEL,
+    RAG_RERANKING_MODEL_AUTO_UPDATE,
+)
+app.state.EMBEDDING_FUNCTION = get_embedding_function(
+    app.state.config.RAG_EMBEDDING_ENGINE,
+    app.state.config.RAG_EMBEDDING_MODEL,
+    app.state.sentence_transformer_ef,
+    (
+        app.state.config.OPENAI_API_BASE_URL
+        if app.state.config.RAG_EMBEDDING_ENGINE == "openai"
+        else app.state.config.OLLAMA_BASE_URL
+    ),
+    (
+        app.state.config.OPENAI_API_KEY
+        if app.state.config.RAG_EMBEDDING_ENGINE == "openai"
+        else app.state.config.OLLAMA_API_KEY
+    ),
+    app.state.config.RAG_EMBEDDING_BATCH_SIZE,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=CORS_ALLOW_ORIGIN,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class CollectionNameForm(BaseModel):
+    collection_name: Optional[str] = None
+class ProcessUrlForm(CollectionNameForm):
+    url: str
+class SearchForm(CollectionNameForm):
+    query: str
+@app.get("/")
+async def get_status():
+    return {
+        "status": True,
+        "chunk_size": app.state.config.CHUNK_SIZE,
+        "chunk_overlap": app.state.config.CHUNK_OVERLAP,
+        "template": app.state.config.RAG_TEMPLATE,
+        "embedding_engine": app.state.config.RAG_EMBEDDING_ENGINE,
+        "embedding_model": app.state.config.RAG_EMBEDDING_MODEL,
+        "reranking_model": app.state.config.RAG_RERANKING_MODEL,
+        "embedding_batch_size": app.state.config.RAG_EMBEDDING_BATCH_SIZE,
+    }
+@app.get("/embedding")
+async def get_embedding_config(user=Depends(get_admin_user)):
+    return {
+        "status": True,
+        "embedding_engine": app.state.config.RAG_EMBEDDING_ENGINE,
+        "embedding_model": app.state.config.RAG_EMBEDDING_MODEL,
+        "embedding_batch_size": app.state.config.RAG_EMBEDDING_BATCH_SIZE,
+        "openai_config": {
+            "url": app.state.config.OPENAI_API_BASE_URL,
+            "key": app.state.config.OPENAI_API_KEY,
+        },
+        "ollama_config": {
+            "url": app.state.config.OLLAMA_BASE_URL,
+            "key": app.state.config.OLLAMA_API_KEY,
+        },
+    }
+@app.get("/reranking")
+async def get_reraanking_config(user=Depends(get_admin_user)):
+    return {
+        "status": True,
+        "reranking_model": app.state.config.RAG_RERANKING_MODEL,
+    }
+class OpenAIConfigForm(BaseModel):
+    url: str
+    key: str
+class OllamaConfigForm(BaseModel):
+    url: str
+    key: str
+class EmbeddingModelUpdateForm(BaseModel):
+    openai_config: Optional[OpenAIConfigForm] = None
+    ollama_config: Optional[OllamaConfigForm] = None
+    embedding_engine: str
+    embedding_model: str
+    embedding_batch_size: Optional[int] = 1
+@app.post("/embedding/update")
+async def update_embedding_config(
+    form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user)
+):
+    log.info(
+        f"Updating embedding model: {app.state.config.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}"
+    )
+    try:
+        app.state.config.RAG_EMBEDDING_ENGINE = form_data.embedding_engine
+        app.state.config.RAG_EMBEDDING_MODEL = form_data.embedding_model
+        if app.state.config.RAG_EMBEDDING_ENGINE in ["ollama", "openai"]:
+            if form_data.openai_config is not None:
+                app.state.config.OPENAI_API_BASE_URL = form_data.openai_config.url
+                app.state.config.OPENAI_API_KEY = form_data.openai_config.key
+            if form_data.ollama_config is not None:
+                app.state.config.OLLAMA_BASE_URL = form_data.ollama_config.url
+                app.state.config.OLLAMA_API_KEY = form_data.ollama_config.key
+            app.state.config.RAG_EMBEDDING_BATCH_SIZE = form_data.embedding_batch_size
+        update_embedding_model(app.state.config.RAG_EMBEDDING_MODEL)
+        app.state.EMBEDDING_FUNCTION = get_embedding_function(
+            app.state.config.RAG_EMBEDDING_ENGINE,
+            app.state.config.RAG_EMBEDDING_MODEL,
+            app.state.sentence_transformer_ef,
+            (
+                app.state.config.OPENAI_API_BASE_URL
+                if app.state.config.RAG_EMBEDDING_ENGINE == "openai"
+                else app.state.config.OLLAMA_BASE_URL
+            ),
+            (
+                app.state.config.OPENAI_API_KEY
+                if app.state.config.RAG_EMBEDDING_ENGINE == "openai"
+                else app.state.config.OLLAMA_API_KEY
+            ),
+            app.state.config.RAG_EMBEDDING_BATCH_SIZE,
+        )
+        return {
+            "status": True,
+            "embedding_engine": app.state.config.RAG_EMBEDDING_ENGINE,
+            "embedding_model": app.state.config.RAG_EMBEDDING_MODEL,
+            "embedding_batch_size": app.state.config.RAG_EMBEDDING_BATCH_SIZE,
+            "openai_config": {
+                "url": app.state.config.OPENAI_API_BASE_URL,
+                "key": app.state.config.OPENAI_API_KEY,
+            },
+            "ollama_config": {
+                "url": app.state.config.OLLAMA_BASE_URL,
+                "key": app.state.config.OLLAMA_API_KEY,
+            },
+        }
+    except Exception as e:
+        log.exception(f"Problem updating embedding model: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=ERROR_MESSAGES.DEFAULT(e),
+        )
+class RerankingModelUpdateForm(BaseModel):
+    reranking_model: str
+@app.post("/reranking/update")
+async def update_reranking_config(
+    form_data: RerankingModelUpdateForm, user=Depends(get_admin_user)
+):
+    log.info(
+        f"Updating reranking model: {app.state.config.RAG_RERANKING_MODEL} to {form_data.reranking_model}"
+    )
+    try:
+        app.state.config.RAG_RERANKING_MODEL = form_data.reranking_model
+        update_reranking_model(app.state.config.RAG_RERANKING_MODEL, True)
+        return {
+            "status": True,
+            "reranking_model": app.state.config.RAG_RERANKING_MODEL,
+        }
+    except Exception as e:
+        log.exception(f"Problem updating reranking model: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=ERROR_MESSAGES.DEFAULT(e),
+        )
+@app.get("/config")
+async def get_rag_config(user=Depends(get_admin_user)):
+    return {
+        "status": True,
+        "pdf_extract_images": app.state.config.PDF_EXTRACT_IMAGES,
+        "content_extraction": {
+            "engine": app.state.config.CONTENT_EXTRACTION_ENGINE,
+            "tika_server_url": app.state.config.TIKA_SERVER_URL,
+        },
+        "chunk": {
+            "text_splitter": app.state.config.TEXT_SPLITTER,
+            "chunk_size": app.state.config.CHUNK_SIZE,
+            "chunk_overlap": app.state.config.CHUNK_OVERLAP,
+        },
+        "file": {
+            "max_size": app.state.config.FILE_MAX_SIZE,
+            "max_count": app.state.config.FILE_MAX_COUNT,
+        },
+        "youtube": {
+            "language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
+            "translation": app.state.YOUTUBE_LOADER_TRANSLATION,
+            "proxy_url": app.state.config.YOUTUBE_LOADER_PROXY_URL,
+        },
+        "web": {
+            "web_loader_ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
+            "search": {
+                "enabled": app.state.config.ENABLE_RAG_WEB_SEARCH,
+                "engine": app.state.config.RAG_WEB_SEARCH_ENGINE,
+                "searxng_query_url": app.state.config.SEARXNG_QUERY_URL,
+                "google_pse_api_key": app.state.config.GOOGLE_PSE_API_KEY,
+                "google_pse_engine_id": app.state.config.GOOGLE_PSE_ENGINE_ID,
+                "brave_search_api_key": app.state.config.BRAVE_SEARCH_API_KEY,
+                "mojeek_search_api_key": app.state.config.MOJEEK_SEARCH_API_KEY,
+                "serpstack_api_key": app.state.config.SERPSTACK_API_KEY,
+                "serpstack_https": app.state.config.SERPSTACK_HTTPS,
+                "serper_api_key": app.state.config.SERPER_API_KEY,
+                "serply_api_key": app.state.config.SERPLY_API_KEY,
+                "tavily_api_key": app.state.config.TAVILY_API_KEY,
+                "searchapi_api_key": app.state.config.SEARCHAPI_API_KEY,
+                "seaarchapi_engine": app.state.config.SEARCHAPI_ENGINE,
+                "jina_api_key": app.state.config.JINA_API_KEY,
+                "bing_search_v7_endpoint": app.state.config.BING_SEARCH_V7_ENDPOINT,
+                "bing_search_v7_subscription_key": app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY,
+                "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+                "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
+            },
+        },
+    }
+class FileConfig(BaseModel):
+    max_size: Optional[int] = None
+    max_count: Optional[int] = None
+class ContentExtractionConfig(BaseModel):
+    engine: str = ""
+    tika_server_url: Optional[str] = None
+class ChunkParamUpdateForm(BaseModel):
+    text_splitter: Optional[str] = None
+    chunk_size: int
+    chunk_overlap: int
+class YoutubeLoaderConfig(BaseModel):
+    language: list[str]
+    translation: Optional[str] = None
+    proxy_url: str = ""
+class WebSearchConfig(BaseModel):
+    enabled: bool
+    engine: Optional[str] = None
+    searxng_query_url: Optional[str] = None
+    google_pse_api_key: Optional[str] = None
+    google_pse_engine_id: Optional[str] = None
+    brave_search_api_key: Optional[str] = None
+    mojeek_search_api_key: Optional[str] = None
+    serpstack_api_key: Optional[str] = None
+    serpstack_https: Optional[bool] = None
+    serper_api_key: Optional[str] = None
+    serply_api_key: Optional[str] = None
+    tavily_api_key: Optional[str] = None
+    searchapi_api_key: Optional[str] = None
+    searchapi_engine: Optional[str] = None
+    jina_api_key: Optional[str] = None
+    bing_search_v7_endpoint: Optional[str] = None
+    bing_search_v7_subscription_key: Optional[str] = None
+    result_count: Optional[int] = None
+    concurrent_requests: Optional[int] = None
+class WebConfig(BaseModel):
+    search: WebSearchConfig
+    web_loader_ssl_verification: Optional[bool] = None
+class ConfigUpdateForm(BaseModel):
+    pdf_extract_images: Optional[bool] = None
+    file: Optional[FileConfig] = None
+    content_extraction: Optional[ContentExtractionConfig] = None
+    chunk: Optional[ChunkParamUpdateForm] = None
+    youtube: Optional[YoutubeLoaderConfig] = None
+    web: Optional[WebConfig] = None
+@app.post("/config/update")
+async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_user)):
+    app.state.config.PDF_EXTRACT_IMAGES = (
+        form_data.pdf_extract_images
+        if form_data.pdf_extract_images is not None
+        else app.state.config.PDF_EXTRACT_IMAGES
+    )
+    if form_data.file is not None:
+        app.state.config.FILE_MAX_SIZE = form_data.file.max_size
+        app.state.config.FILE_MAX_COUNT = form_data.file.max_count
+    if form_data.content_extraction is not None:
+        log.info(f"Updating text settings: {form_data.content_extraction}")
+        app.state.config.CONTENT_EXTRACTION_ENGINE = form_data.content_extraction.engine
+        app.state.config.TIKA_SERVER_URL = form_data.content_extraction.tika_server_url
+    if form_data.chunk is not None:
+        app.state.config.TEXT_SPLITTER = form_data.chunk.text_splitter
+        app.state.config.CHUNK_SIZE = form_data.chunk.chunk_size
+        app.state.config.CHUNK_OVERLAP = form_data.chunk.chunk_overlap
+    if form_data.youtube is not None:
+        app.state.config.YOUTUBE_LOADER_LANGUAGE = form_data.youtube.language
+        app.state.config.YOUTUBE_LOADER_PROXY_URL = form_data.youtube.proxy_url
+        app.state.YOUTUBE_LOADER_TRANSLATION = form_data.youtube.translation
+    if form_data.web is not None:
+        app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
+            # Note: When UI "Bypass SSL verification for Websites"=True then ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION=False
+            form_data.web.web_loader_ssl_verification
+        )
+        app.state.config.ENABLE_RAG_WEB_SEARCH = form_data.web.search.enabled
+        app.state.config.RAG_WEB_SEARCH_ENGINE = form_data.web.search.engine
+        app.state.config.SEARXNG_QUERY_URL = form_data.web.search.searxng_query_url
+        app.state.config.GOOGLE_PSE_API_KEY = form_data.web.search.google_pse_api_key
+        app.state.config.GOOGLE_PSE_ENGINE_ID = (
+            form_data.web.search.google_pse_engine_id
+        )
+        app.state.config.BRAVE_SEARCH_API_KEY = (
+            form_data.web.search.brave_search_api_key
+        )
+        app.state.config.MOJEEK_SEARCH_API_KEY = (
+            form_data.web.search.mojeek_search_api_key
+        )
+        app.state.config.SERPSTACK_API_KEY = form_data.web.search.serpstack_api_key
+        app.state.config.SERPSTACK_HTTPS = form_data.web.search.serpstack_https
+        app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key
+        app.state.config.SERPLY_API_KEY = form_data.web.search.serply_api_key
+        app.state.config.TAVILY_API_KEY = form_data.web.search.tavily_api_key
+        app.state.config.SEARCHAPI_API_KEY = form_data.web.search.searchapi_api_key
+        app.state.config.SEARCHAPI_ENGINE = form_data.web.search.searchapi_engine
+        app.state.config.JINA_API_KEY = form_data.web.search.jina_api_key
+        app.state.config.BING_SEARCH_V7_ENDPOINT = (
+            form_data.web.search.bing_search_v7_endpoint
+        )
+        app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = (
+            form_data.web.search.bing_search_v7_subscription_key
+        )
+        app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = form_data.web.search.result_count
+        app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = (
+            form_data.web.search.concurrent_requests
+        )
+    return {
+        "status": True,
+        "pdf_extract_images": app.state.config.PDF_EXTRACT_IMAGES,
+        "file": {
+            "max_size": app.state.config.FILE_MAX_SIZE,
+            "max_count": app.state.config.FILE_MAX_COUNT,
+        },
+        "content_extraction": {
+            "engine": app.state.config.CONTENT_EXTRACTION_ENGINE,
+            "tika_server_url": app.state.config.TIKA_SERVER_URL,
+        },
+        "chunk": {
+            "text_splitter": app.state.config.TEXT_SPLITTER,
+            "chunk_size": app.state.config.CHUNK_SIZE,
+            "chunk_overlap": app.state.config.CHUNK_OVERLAP,
+        },
+        "youtube": {
+            "language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
+            "proxy_url": app.state.config.YOUTUBE_LOADER_PROXY_URL,
+            "translation": app.state.YOUTUBE_LOADER_TRANSLATION,
+        },
+        "web": {
+            "web_loader_ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
+            "search": {
+                "enabled": app.state.config.ENABLE_RAG_WEB_SEARCH,
+                "engine": app.state.config.RAG_WEB_SEARCH_ENGINE,
+                "searxng_query_url": app.state.config.SEARXNG_QUERY_URL,
+                "google_pse_api_key": app.state.config.GOOGLE_PSE_API_KEY,
+                "google_pse_engine_id": app.state.config.GOOGLE_PSE_ENGINE_ID,
+                "brave_search_api_key": app.state.config.BRAVE_SEARCH_API_KEY,
+                "mojeek_search_api_key": app.state.config.MOJEEK_SEARCH_API_KEY,
+                "serpstack_api_key": app.state.config.SERPSTACK_API_KEY,
+                "serpstack_https": app.state.config.SERPSTACK_HTTPS,
+                "serper_api_key": app.state.config.SERPER_API_KEY,
+                "serply_api_key": app.state.config.SERPLY_API_KEY,
+                "serachapi_api_key": app.state.config.SEARCHAPI_API_KEY,
+                "searchapi_engine": app.state.config.SEARCHAPI_ENGINE,
+                "tavily_api_key": app.state.config.TAVILY_API_KEY,
+                "jina_api_key": app.state.config.JINA_API_KEY,
+                "bing_search_v7_endpoint": app.state.config.BING_SEARCH_V7_ENDPOINT,
+                "bing_search_v7_subscription_key": app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY,
+                "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+                "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
+            },
+        },
+    }
+@app.get("/template")
+async def get_rag_template(user=Depends(get_verified_user)):
+    return {
+        "status": True,
+        "template": app.state.config.RAG_TEMPLATE,
+    }
+@app.get("/query/settings")
+async def get_query_settings(user=Depends(get_admin_user)):
+    return {
+        "status": True,
+        "template": app.state.config.RAG_TEMPLATE,
+        "k": app.state.config.TOP_K,
+        "r": app.state.config.RELEVANCE_THRESHOLD,
+        "hybrid": app.state.config.ENABLE_RAG_HYBRID_SEARCH,
+    }
+class QuerySettingsForm(BaseModel):
+    k: Optional[int] = None
+    r: Optional[float] = None
+    template: Optional[str] = None
+    hybrid: Optional[bool] = None
+@app.post("/query/settings/update")
+async def update_query_settings(
+    form_data: QuerySettingsForm, user=Depends(get_admin_user)
+):
+    app.state.config.RAG_TEMPLATE = form_data.template
+    app.state.config.TOP_K = form_data.k if form_data.k else 4
+    app.state.config.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0
+    app.state.config.ENABLE_RAG_HYBRID_SEARCH = (
+        form_data.hybrid if form_data.hybrid else False
+    )
+    return {
+        "status": True,
+        "template": app.state.config.RAG_TEMPLATE,
+        "k": app.state.config.TOP_K,
+        "r": app.state.config.RELEVANCE_THRESHOLD,
+        "hybrid": app.state.config.ENABLE_RAG_HYBRID_SEARCH,
+    }
+####################################
+#
+# Document process and retrieval
+#
+####################################
+def _get_docs_info(docs: list[Document]) -> str:
+    docs_info = set()
+    # Trying to select relevant metadata identifying the document.
+    for doc in docs:
+        metadata = getattr(doc, "metadata", {})
+        doc_name = metadata.get("name", "")
+        if not doc_name:
+            doc_name = metadata.get("title", "")
+        if not doc_name:
+            doc_name = metadata.get("source", "")
+        if doc_name:
+            docs_info.add(doc_name)
+    return ", ".join(docs_info)
+def save_docs_to_vector_db(
+    docs,
+    collection_name,
+    metadata: Optional[dict] = None,
+    overwrite: bool = False,
+    split: bool = True,
+    add: bool = False,
+) -> bool:
+    log.info(
+        f"save_docs_to_vector_db: document {_get_docs_info(docs)} {collection_name}"
+    )
+    # Check if entries with the same hash (metadata.hash) already exist
+    if metadata and "hash" in metadata:
+        result = VECTOR_DB_CLIENT.query(
+            collection_name=collection_name,
+            filter={"hash": metadata["hash"]},
+        )
+        if result is not None:
+            existing_doc_ids = result.ids[0]
+            if existing_doc_ids:
+                log.info(f"Document with hash {metadata['hash']} already exists")
+                raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)
+    if split:
+        if app.state.config.TEXT_SPLITTER in ["", "character"]:
+            text_splitter = RecursiveCharacterTextSplitter(
+                chunk_size=app.state.config.CHUNK_SIZE,
+                chunk_overlap=app.state.config.CHUNK_OVERLAP,
+                add_start_index=True,
+            )
+        elif app.state.config.TEXT_SPLITTER == "token":
+            log.info(
+                f"Using token text splitter: {app.state.config.TIKTOKEN_ENCODING_NAME}"
+            )
+            tiktoken.get_encoding(str(app.state.config.TIKTOKEN_ENCODING_NAME))
+            text_splitter = TokenTextSplitter(
+                encoding_name=str(app.state.config.TIKTOKEN_ENCODING_NAME),
+                chunk_size=app.state.config.CHUNK_SIZE,
+                chunk_overlap=app.state.config.CHUNK_OVERLAP,
+                add_start_index=True,
+            )
+        else:
+            raise ValueError(ERROR_MESSAGES.DEFAULT("Invalid text splitter"))
+        docs = text_splitter.split_documents(docs)
+    if len(docs) == 0:
+        raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT)
+    texts = [doc.page_content for doc in docs]
+    metadatas = [
+        {
+            **doc.metadata,
+            **(metadata if metadata else {}),
+            "embedding_config": json.dumps(
+                {
+                    "engine": app.state.config.RAG_EMBEDDING_ENGINE,
+                    "model": app.state.config.RAG_EMBEDDING_MODEL,
+                }
+            ),
+        }
+        for doc in docs
+    ]
+    # ChromaDB does not like datetime formats
+    # for meta-data so convert them to string.
+    for metadata in metadatas:
+        for key, value in metadata.items():
+            if isinstance(value, datetime):
+                metadata[key] = str(value)
+    try:
+        if VECTOR_DB_CLIENT.has_collection(collection_name=collection_name):
+            log.info(f"collection {collection_name} already exists")
+            if overwrite:
+                VECTOR_DB_CLIENT.delete_collection(collection_name=collection_name)
+                log.info(f"deleting existing collection {collection_name}")
+            elif add is False:
+                log.info(
+                    f"collection {collection_name} already exists, overwrite is False and add is False"
+                )
+                return True
+        log.info(f"adding to collection {collection_name}")
+        embedding_function = get_embedding_function(
+            app.state.config.RAG_EMBEDDING_ENGINE,
+            app.state.config.RAG_EMBEDDING_MODEL,
+            app.state.sentence_transformer_ef,
+            (
+                app.state.config.OPENAI_API_BASE_URL
+                if app.state.config.RAG_EMBEDDING_ENGINE == "openai"
+                else app.state.config.OLLAMA_BASE_URL
+            ),
+            (
+                app.state.config.OPENAI_API_KEY
+                if app.state.config.RAG_EMBEDDING_ENGINE == "openai"
+                else app.state.config.OLLAMA_API_KEY
+            ),
+            app.state.config.RAG_EMBEDDING_BATCH_SIZE,
+        )
+        embeddings = embedding_function(
+            list(map(lambda x: x.replace("\n", " "), texts))
+        )
+        items = [
+            {
+                "id": str(uuid.uuid4()),
+                "text": text,
+                "vector": embeddings[idx],
+                "metadata": metadatas[idx],
+            }
+            for idx, text in enumerate(texts)
+        ]
+        VECTOR_DB_CLIENT.insert(
+            collection_name=collection_name,
+            items=items,
+        )
+        return True
+    except Exception as e:
+        log.exception(e)
+        raise e
+class ProcessFileForm(BaseModel):
+    file_id: str
+    content: Optional[str] = None
+    collection_name: Optional[str] = None
+@app.post("/process/file")
+def process_file(
+    form_data: ProcessFileForm,
+    user=Depends(get_verified_user),
+):
+    try:
+        file = Files.get_file_by_id(form_data.file_id)
+        collection_name = form_data.collection_name
+        if collection_name is None:
+            collection_name = f"file-{file.id}"
+        if form_data.content:
+            # Update the content in the file
+            # Usage: /files/{file_id}/data/content/update
+            VECTOR_DB_CLIENT.delete_collection(collection_name=f"file-{file.id}")
+            docs = [
+                Document(
+                    page_content=form_data.content.replace("<br/>", "\n"),
+                    metadata={
+                        **file.meta,
+                        "name": file.filename,
+                        "created_by": file.user_id,
+                        "file_id": file.id,
+                        "source": file.filename,
+                    },
+                )
+            ]
+            text_content = form_data.content
+        elif form_data.collection_name:
+            # Check if the file has already been processed and save the content
+            # Usage: /knowledge/{id}/file/add, /knowledge/{id}/file/update
+            result = VECTOR_DB_CLIENT.query(
+                collection_name=f"file-{file.id}", filter={"file_id": file.id}
+            )
+            if result is not None and len(result.ids[0]) > 0:
+                docs = [
+                    Document(
+                        page_content=result.documents[0][idx],
+                        metadata=result.metadatas[0][idx],
+                    )
+                    for idx, id in enumerate(result.ids[0])
+                ]
+            else:
+                docs = [
+                    Document(
+                        page_content=file.data.get("content", ""),
+                        metadata={
+                            **file.meta,
+                            "name": file.filename,
+                            "created_by": file.user_id,
+                            "file_id": file.id,
+                            "source": file.filename,
+                        },
+                    )
+                ]
+            text_content = file.data.get("content", "")
+        else:
+            # Process the file and save the content
+            # Usage: /files/
+            file_path = file.path
+            if file_path:
+                file_path = Storage.get_file(file_path)
+                loader = Loader(
+                    engine=app.state.config.CONTENT_EXTRACTION_ENGINE,
+                    TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL,
+                    PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
+                )
+                docs = loader.load(
+                    file.filename, file.meta.get("content_type"), file_path
+                )
+                docs = [
+                    Document(
+                        page_content=doc.page_content,
+                        metadata={
+                            **doc.metadata,
+                            "name": file.filename,
+                            "created_by": file.user_id,
+                            "file_id": file.id,
+                            "source": file.filename,
+                        },
+                    )
+                    for doc in docs
+                ]
+            else:
+                docs = [
+                    Document(
+                        page_content=file.data.get("content", ""),
+                        metadata={
+                            **file.meta,
+                            "name": file.filename,
+                            "created_by": file.user_id,
+                            "file_id": file.id,
+                            "source": file.filename,
+                        },
+                    )
+                ]
+            text_content = " ".join([doc.page_content for doc in docs])
+        log.debug(f"text_content: {text_content}")
+        Files.update_file_data_by_id(
+            file.id,
+            {"content": text_content},
+        )
+        hash = calculate_sha256_string(text_content)
+        Files.update_file_hash_by_id(file.id, hash)
+        try:
+            result = save_docs_to_vector_db(
+                docs=docs,
+                collection_name=collection_name,
+                metadata={
+                    "file_id": file.id,
+                    "name": file.filename,
+                    "hash": hash,
+                },
+                add=(True if form_data.collection_name else False),
+            )
+            if result:
+                Files.update_file_metadata_by_id(
+                    file.id,
+                    {
+                        "collection_name": collection_name,
+                    },
+                )
+                return {
+                    "status": True,
+                    "collection_name": collection_name,
+                    "filename": file.filename,
+                    "content": text_content,
+                }
+        except Exception as e:
+            raise e
+    except Exception as e:
+        log.exception(e)
+        if "No pandoc was found" in str(e):
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED,
+            )
+        else:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=str(e),
+            )
+class ProcessTextForm(BaseModel):
+    name: str
+    content: str
+    collection_name: Optional[str] = None
+@app.post("/process/text")
+def process_text(
+    form_data: ProcessTextForm,
+    user=Depends(get_verified_user),
+):
+    collection_name = form_data.collection_name
+    if collection_name is None:
+        collection_name = calculate_sha256_string(form_data.content)
+    docs = [
+        Document(
+            page_content=form_data.content,
+            metadata={"name": form_data.name, "created_by": user.id},
+        )
+    ]
+    text_content = form_data.content
+    log.debug(f"text_content: {text_content}")
+    result = save_docs_to_vector_db(docs, collection_name)
+    if result:
+        return {
+            "status": True,
+            "collection_name": collection_name,
+            "content": text_content,
+        }
+    else:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=ERROR_MESSAGES.DEFAULT(),
+        )
+@app.post("/process/youtube")
+def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_user)):
+    try:
+        collection_name = form_data.collection_name
+        if not collection_name:
+            collection_name = calculate_sha256_string(form_data.url)[:63]
+        loader = YoutubeLoader(
+            form_data.url,
+            language=app.state.config.YOUTUBE_LOADER_LANGUAGE,
+            proxy_url=app.state.config.YOUTUBE_LOADER_PROXY_URL,
+        )
+        docs = loader.load()
+        content = " ".join([doc.page_content for doc in docs])
+        log.debug(f"text_content: {content}")
+        save_docs_to_vector_db(docs, collection_name, overwrite=True)
+        return {
+            "status": True,
+            "collection_name": collection_name,
+            "filename": form_data.url,
+            "file": {
+                "data": {
+                    "content": content,
+                },
+                "meta": {
+                    "name": form_data.url,
+                },
+            },
+        }
+    except Exception as e:
+        log.exception(e)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=ERROR_MESSAGES.DEFAULT(e),
+        )
+@app.post("/process/web")
+def process_web(form_data: ProcessUrlForm, user=Depends(get_verified_user)):
+    try:
+        collection_name = form_data.collection_name
+        if not collection_name:
+            collection_name = calculate_sha256_string(form_data.url)[:63]
+        loader = get_web_loader(
+            form_data.url,
+            verify_ssl=app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
+            requests_per_second=app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
+        )
+        docs = loader.load()
+        content = " ".join([doc.page_content for doc in docs])
+        log.debug(f"text_content: {content}")
+        save_docs_to_vector_db(docs, collection_name, overwrite=True)
+        return {
+            "status": True,
+            "collection_name": collection_name,
+            "filename": form_data.url,
+            "file": {
+                "data": {
+                    "content": content,
+                },
+                "meta": {
+                    "name": form_data.url,
+                },
+            },
+        }
+    except Exception as e:
+        log.exception(e)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=ERROR_MESSAGES.DEFAULT(e),
+        )
+def search_web(engine: str, query: str) -> list[SearchResult]:
+    """Search the web using a search engine and return the results as a list of SearchResult objects.
+    Will look for a search engine API key in environment variables in the following order:
+    - SEARXNG_QUERY_URL
+    - GOOGLE_PSE_API_KEY + GOOGLE_PSE_ENGINE_ID
+    - BRAVE_SEARCH_API_KEY
+    - MOJEEK_SEARCH_API_KEY
+    - SERPSTACK_API_KEY
+    - SERPER_API_KEY
+    - SERPLY_API_KEY
+    - TAVILY_API_KEY
+    - SEARCHAPI_API_KEY + SEARCHAPI_ENGINE (by default `google`)
+    Args:
+        query (str): The query to search for
+    """
+    # TODO: add playwright to search the web
+    if engine == "searxng":
+        if app.state.config.SEARXNG_QUERY_URL:
+            return search_searxng(
+                app.state.config.SEARXNG_QUERY_URL,
+                query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+                app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
+            )
+        else:
+            raise Exception("No SEARXNG_QUERY_URL found in environment variables")
+    elif engine == "google_pse":
+        if (
+            app.state.config.GOOGLE_PSE_API_KEY
+            and app.state.config.GOOGLE_PSE_ENGINE_ID
+        ):
+            return search_google_pse(
+                app.state.config.GOOGLE_PSE_API_KEY,
+                app.state.config.GOOGLE_PSE_ENGINE_ID,
+                query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+                app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
+            )
+        else:
+            raise Exception(
+                "No GOOGLE_PSE_API_KEY or GOOGLE_PSE_ENGINE_ID found in environment variables"
+            )
+    elif engine == "brave":
+        if app.state.config.BRAVE_SEARCH_API_KEY:
+            return search_brave(
+                app.state.config.BRAVE_SEARCH_API_KEY,
+                query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+                app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
+            )
+        else:
+            raise Exception("No BRAVE_SEARCH_API_KEY found in environment variables")
+    elif engine == "mojeek":
+        if app.state.config.MOJEEK_SEARCH_API_KEY:
+            return search_mojeek(
+                app.state.config.MOJEEK_SEARCH_API_KEY,
+                query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+                app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
+            )
+        else:
+            raise Exception("No MOJEEK_SEARCH_API_KEY found in environment variables")
+    elif engine == "serpstack":
+        if app.state.config.SERPSTACK_API_KEY:
+            return search_serpstack(
+                app.state.config.SERPSTACK_API_KEY,
+                query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+                app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
+                https_enabled=app.state.config.SERPSTACK_HTTPS,
+            )
+        else:
+            raise Exception("No SERPSTACK_API_KEY found in environment variables")
+    elif engine == "serper":
+        if app.state.config.SERPER_API_KEY:
+            return search_serper(
+                app.state.config.SERPER_API_KEY,
+                query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+                app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
+            )
+        else:
+            raise Exception("No SERPER_API_KEY found in environment variables")
+    elif engine == "serply":
+        if app.state.config.SERPLY_API_KEY:
+            return search_serply(
+                app.state.config.SERPLY_API_KEY,
+                query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+                app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
+            )
+        else:
+            raise Exception("No SERPLY_API_KEY found in environment variables")
+    elif engine == "duckduckgo":
+        return search_duckduckgo(
+            query,
+            app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+            app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
+        )
+    elif engine == "tavily":
+        if app.state.config.TAVILY_API_KEY:
+            return search_tavily(
+                app.state.config.TAVILY_API_KEY,
+                query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+            )
+        else:
+            raise Exception("No TAVILY_API_KEY found in environment variables")
+    elif engine == "searchapi":
+        if app.state.config.SEARCHAPI_API_KEY:
+            return search_searchapi(
+                app.state.config.SEARCHAPI_API_KEY,
+                app.state.config.SEARCHAPI_ENGINE,
+                query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+                app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
+            )
+        else:
+            raise Exception("No SEARCHAPI_API_KEY found in environment variables")
+    elif engine == "jina":
+        return search_jina(
+            app.state.config.JINA_API_KEY,
+            query,
+            app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+        )
+    elif engine == "bing":
+        return search_bing(
+            app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY,
+            app.state.config.BING_SEARCH_V7_ENDPOINT,
+            str(DEFAULT_LOCALE),
+            query,
+            app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+            app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
+        )
+    else:
+        raise Exception("No search engine API key found in environment variables")
+@app.post("/process/web/search")
+def process_web_search(form_data: SearchForm, user=Depends(get_verified_user)):
+    try:
+        logging.info(
+            f"trying to web search with {app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query}"
+        )
+        web_results = search_web(
+            app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query
+        )
+    except Exception as e:
+        log.exception(e)
+        print(e)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=ERROR_MESSAGES.WEB_SEARCH_ERROR(e),
+        )
+    try:
+        collection_name = form_data.collection_name
+        if collection_name == "":
+            collection_name = calculate_sha256_string(form_data.query)[:63]
+        urls = [result.link for result in web_results]
+        loader = get_web_loader(
+            urls,
+            verify_ssl=app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
+            requests_per_second=app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
+        )
+        docs = loader.aload()
+        save_docs_to_vector_db(docs, collection_name, overwrite=True)
+        return {
+            "status": True,
+            "collection_name": collection_name,
+            "filenames": urls,
+        }
+    except Exception as e:
+        log.exception(e)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=ERROR_MESSAGES.DEFAULT(e),
+        )
+class QueryDocForm(BaseModel):
+    collection_name: str
+    query: str
+    k: Optional[int] = None
+    r: Optional[float] = None
+    hybrid: Optional[bool] = None
+@app.post("/query/doc")
+def query_doc_handler(
+    form_data: QueryDocForm,
+    user=Depends(get_verified_user),
+):
+    try:
+        if app.state.config.ENABLE_RAG_HYBRID_SEARCH:
+            return query_doc_with_hybrid_search(
+                collection_name=form_data.collection_name,
+                query=form_data.query,
+                embedding_function=app.state.EMBEDDING_FUNCTION,
+                k=form_data.k if form_data.k else app.state.config.TOP_K,
+                reranking_function=app.state.sentence_transformer_rf,
+                r=(
+                    form_data.r if form_data.r else app.state.config.RELEVANCE_THRESHOLD
+                ),
+            )
+        else:
+            return query_doc(
+                collection_name=form_data.collection_name,
+                query=form_data.query,
+                embedding_function=app.state.EMBEDDING_FUNCTION,
+                k=form_data.k if form_data.k else app.state.config.TOP_K,
+            )
+    except Exception as e:
+        log.exception(e)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=ERROR_MESSAGES.DEFAULT(e),
+        )
+class QueryCollectionsForm(BaseModel):
+    collection_names: list[str]
+    query: str
+    k: Optional[int] = None
+    r: Optional[float] = None
+    hybrid: Optional[bool] = None
+@app.post("/query/collection")
+def query_collection_handler(
+    form_data: QueryCollectionsForm,
+    user=Depends(get_verified_user),
+):
+    try:
+        if app.state.config.ENABLE_RAG_HYBRID_SEARCH:
+            return query_collection_with_hybrid_search(
+                collection_names=form_data.collection_names,
+                query=form_data.query,
+                embedding_function=app.state.EMBEDDING_FUNCTION,
+                k=form_data.k if form_data.k else app.state.config.TOP_K,
+                reranking_function=app.state.sentence_transformer_rf,
+                r=(
+                    form_data.r if form_data.r else app.state.config.RELEVANCE_THRESHOLD
+                ),
+            )
+        else:
+            return query_collection(
+                collection_names=form_data.collection_names,
+                query=form_data.query,
+                embedding_function=app.state.EMBEDDING_FUNCTION,
+                k=form_data.k if form_data.k else app.state.config.TOP_K,
+            )
+    except Exception as e:
+        log.exception(e)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=ERROR_MESSAGES.DEFAULT(e),
+        )
+####################################
+#
+# Vector DB operations
+#
+####################################
+class DeleteForm(BaseModel):
+    collection_name: str
+    file_id: str
+@app.post("/delete")
+def delete_entries_from_collection(form_data: DeleteForm, user=Depends(get_admin_user)):
+    try:
+        if VECTOR_DB_CLIENT.has_collection(collection_name=form_data.collection_name):
+            file = Files.get_file_by_id(form_data.file_id)
+            hash = file.hash
+            VECTOR_DB_CLIENT.delete(
+                collection_name=form_data.collection_name,
+                metadata={"hash": hash},
+            )
+            return {"status": True}
+        else:
+            return {"status": False}
+    except Exception as e:
+        log.exception(e)
+        return {"status": False}
+@app.post("/reset/db")
+def reset_vector_db(user=Depends(get_admin_user)):
+    VECTOR_DB_CLIENT.reset()
+    Knowledges.delete_all_knowledge()
+@app.post("/reset/uploads")
+def reset_upload_dir(user=Depends(get_admin_user)) -> bool:
+    folder = f"{UPLOAD_DIR}"
+    try:
+        # Check if the directory exists
+        if os.path.exists(folder):
+            # Iterate over all the files and directories in the specified directory
+            for filename in os.listdir(folder):
+                file_path = os.path.join(folder, filename)
+                try:
+                    if os.path.isfile(file_path) or os.path.islink(file_path):
+                        os.unlink(file_path)  # Remove the file or link
+                    elif os.path.isdir(file_path):
+                        shutil.rmtree(file_path)  # Remove the directory
+                except Exception as e:
+                    print(f"Failed to delete {file_path}. Reason: {e}")
+        else:
+            print(f"The directory {folder} does not exist")
+    except Exception as e:
+        print(f"Failed to process the directory {folder}. Reason: {e}")
+    return True
+if ENV == "dev":
+    @app.get("/ef")
+    async def get_embeddings():
+        return {"result": app.state.EMBEDDING_FUNCTION("hello world")}
+    @app.get("/ef/{text}")
+    async def get_embeddings_text(text: str):
+        return {"result": app.state.EMBEDDING_FUNCTION(text)}

backend/open_webui/apps/retrieval/models/colbert.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import os
+import torch
+import numpy as np
+from colbert.infra import ColBERTConfig
+from colbert.modeling.checkpoint import Checkpoint
+class ColBERT:
+    def __init__(self, name, **kwargs) -> None:
+        print("ColBERT: Loading model", name)
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        DOCKER = kwargs.get("env") == "docker"
+        if DOCKER:
+            # This is a workaround for the issue with the docker container
+            # where the torch extension is not loaded properly
+            # and the following error is thrown:
+            # /root/.cache/torch_extensions/py311_cpu/segmented_maxsim_cpp/segmented_maxsim_cpp.so: cannot open shared object file: No such file or directory
+            lock_file = (
+                "/root/.cache/torch_extensions/py311_cpu/segmented_maxsim_cpp/lock"
+            )
+            if os.path.exists(lock_file):
+                os.remove(lock_file)
+        self.ckpt = Checkpoint(
+            name,
+            colbert_config=ColBERTConfig(model_name=name),
+        ).to(self.device)
+        pass
+    def calculate_similarity_scores(self, query_embeddings, document_embeddings):
+        query_embeddings = query_embeddings.to(self.device)
+        document_embeddings = document_embeddings.to(self.device)
+        # Validate dimensions to ensure compatibility
+        if query_embeddings.dim() != 3:
+            raise ValueError(
+                f"Expected query embeddings to have 3 dimensions, but got {query_embeddings.dim()}."
+            )
+        if document_embeddings.dim() != 3:
+            raise ValueError(
+                f"Expected document embeddings to have 3 dimensions, but got {document_embeddings.dim()}."
+            )
+        if query_embeddings.size(0) not in [1, document_embeddings.size(0)]:
+            raise ValueError(
+                "There should be either one query or queries equal to the number of documents."
+            )
+        # Transpose the query embeddings to align for matrix multiplication
+        transposed_query_embeddings = query_embeddings.permute(0, 2, 1)
+        # Compute similarity scores using batch matrix multiplication
+        computed_scores = torch.matmul(document_embeddings, transposed_query_embeddings)
+        # Apply max pooling to extract the highest semantic similarity across each document's sequence
+        maximum_scores = torch.max(computed_scores, dim=1).values
+        # Sum up the maximum scores across features to get the overall document relevance scores
+        final_scores = maximum_scores.sum(dim=1)
+        normalized_scores = torch.softmax(final_scores, dim=0)
+        return normalized_scores.detach().cpu().numpy().astype(np.float32)
+    def predict(self, sentences):
+        query = sentences[0][0]
+        docs = [i[1] for i in sentences]
+        # Embedding the documents
+        embedded_docs = self.ckpt.docFromText(docs, bsize=32)[0]
+        # Embedding the queries
+        embedded_queries = self.ckpt.queryFromText([query], bsize=32)
+        embedded_query = embedded_queries[0]
+        # Calculate retrieval scores for the query against all documents
+        scores = self.calculate_similarity_scores(
+            embedded_query.unsqueeze(0), embedded_docs
+        )
+        return scores

backend/open_webui/apps/retrieval/utils.py ADDED Viewed

	@@ -0,0 +1,532 @@

+import logging
+import os
+import uuid
+from typing import Optional, Union
+import asyncio
+import requests
+from huggingface_hub import snapshot_download
+from langchain.retrievers import ContextualCompressionRetriever, EnsembleRetriever
+from langchain_community.retrievers import BM25Retriever
+from langchain_core.documents import Document
+from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT
+from open_webui.utils.misc import get_last_user_message
+from open_webui.env import SRC_LOG_LEVELS
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["RAG"])
+from typing import Any
+from langchain_core.callbacks import CallbackManagerForRetrieverRun
+from langchain_core.retrievers import BaseRetriever
+class VectorSearchRetriever(BaseRetriever):
+    collection_name: Any
+    embedding_function: Any
+    top_k: int
+    def _get_relevant_documents(
+        self,
+        query: str,
+        *,
+        run_manager: CallbackManagerForRetrieverRun,
+    ) -> list[Document]:
+        result = VECTOR_DB_CLIENT.search(
+            collection_name=self.collection_name,
+            vectors=[self.embedding_function(query)],
+            limit=self.top_k,
+        )
+        ids = result.ids[0]
+        metadatas = result.metadatas[0]
+        documents = result.documents[0]
+        results = []
+        for idx in range(len(ids)):
+            results.append(
+                Document(
+                    metadata=metadatas[idx],
+                    page_content=documents[idx],
+                )
+            )
+        return results
+def query_doc(
+    collection_name: str,
+    query_embedding: list[float],
+    k: int,
+):
+    try:
+        result = VECTOR_DB_CLIENT.search(
+            collection_name=collection_name,
+            vectors=[query_embedding],
+            limit=k,
+        )
+        log.info(f"query_doc:result {result.ids} {result.metadatas}")
+        return result
+    except Exception as e:
+        print(e)
+        raise e
+def query_doc_with_hybrid_search(
+    collection_name: str,
+    query: str,
+    embedding_function,
+    k: int,
+    reranking_function,
+    r: float,
+) -> dict:
+    try:
+        result = VECTOR_DB_CLIENT.get(collection_name=collection_name)
+        bm25_retriever = BM25Retriever.from_texts(
+            texts=result.documents[0],
+            metadatas=result.metadatas[0],
+        )
+        bm25_retriever.k = k
+        vector_search_retriever = VectorSearchRetriever(
+            collection_name=collection_name,
+            embedding_function=embedding_function,
+            top_k=k,
+        )
+        ensemble_retriever = EnsembleRetriever(
+            retrievers=[bm25_retriever, vector_search_retriever], weights=[0.5, 0.5]
+        )
+        compressor = RerankCompressor(
+            embedding_function=embedding_function,
+            top_n=k,
+            reranking_function=reranking_function,
+            r_score=r,
+        )
+        compression_retriever = ContextualCompressionRetriever(
+            base_compressor=compressor, base_retriever=ensemble_retriever
+        )
+        result = compression_retriever.invoke(query)
+        result = {
+            "distances": [[d.metadata.get("score") for d in result]],
+            "documents": [[d.page_content for d in result]],
+            "metadatas": [[d.metadata for d in result]],
+        }
+        log.info(
+            "query_doc_with_hybrid_search:result "
+            + f'{result["metadatas"]} {result["distances"]}'
+        )
+        return result
+    except Exception as e:
+        raise e
+def merge_and_sort_query_results(
+    query_results: list[dict], k: int, reverse: bool = False
+) -> list[dict]:
+    # Initialize lists to store combined data
+    combined_distances = []
+    combined_documents = []
+    combined_metadatas = []
+    for data in query_results:
+        combined_distances.extend(data["distances"][0])
+        combined_documents.extend(data["documents"][0])
+        combined_metadatas.extend(data["metadatas"][0])
+    # Create a list of tuples (distance, document, metadata)
+    combined = list(zip(combined_distances, combined_documents, combined_metadatas))
+    # Sort the list based on distances
+    combined.sort(key=lambda x: x[0], reverse=reverse)
+    # We don't have anything :-(
+    if not combined:
+        sorted_distances = []
+        sorted_documents = []
+        sorted_metadatas = []
+    else:
+        # Unzip the sorted list
+        sorted_distances, sorted_documents, sorted_metadatas = zip(*combined)
+        # Slicing the lists to include only k elements
+        sorted_distances = list(sorted_distances)[:k]
+        sorted_documents = list(sorted_documents)[:k]
+        sorted_metadatas = list(sorted_metadatas)[:k]
+    # Create the output dictionary
+    result = {
+        "distances": [sorted_distances],
+        "documents": [sorted_documents],
+        "metadatas": [sorted_metadatas],
+    }
+    return result
+def query_collection(
+    collection_names: list[str],
+    queries: list[str],
+    embedding_function,
+    k: int,
+) -> dict:
+    results = []
+    for query in queries:
+        query_embedding = embedding_function(query)
+        for collection_name in collection_names:
+            if collection_name:
+                try:
+                    result = query_doc(
+                        collection_name=collection_name,
+                        k=k,
+                        query_embedding=query_embedding,
+                    )
+                    if result is not None:
+                        results.append(result.model_dump())
+                except Exception as e:
+                    log.exception(f"Error when querying the collection: {e}")
+            else:
+                pass
+    return merge_and_sort_query_results(results, k=k)
+def query_collection_with_hybrid_search(
+    collection_names: list[str],
+    queries: list[str],
+    embedding_function,
+    k: int,
+    reranking_function,
+    r: float,
+) -> dict:
+    results = []
+    error = False
+    for collection_name in collection_names:
+        try:
+            for query in queries:
+                result = query_doc_with_hybrid_search(
+                    collection_name=collection_name,
+                    query=query,
+                    embedding_function=embedding_function,
+                    k=k,
+                    reranking_function=reranking_function,
+                    r=r,
+                )
+                results.append(result)
+        except Exception as e:
+            log.exception(
+                "Error when querying the collection with " f"hybrid_search: {e}"
+            )
+            error = True
+    if error:
+        raise Exception(
+            "Hybrid search failed for all collections. Using Non hybrid search as fallback."
+        )
+    return merge_and_sort_query_results(results, k=k, reverse=True)
+def get_embedding_function(
+    embedding_engine,
+    embedding_model,
+    embedding_function,
+    url,
+    key,
+    embedding_batch_size,
+):
+    if embedding_engine == "":
+        return lambda query: embedding_function.encode(query).tolist()
+    elif embedding_engine in ["ollama", "openai"]:
+        func = lambda query: generate_embeddings(
+            engine=embedding_engine,
+            model=embedding_model,
+            text=query,
+            url=url,
+            key=key,
+        )
+        def generate_multiple(query, func):
+            if isinstance(query, list):
+                embeddings = []
+                for i in range(0, len(query), embedding_batch_size):
+                    embeddings.extend(func(query[i : i + embedding_batch_size]))
+                return embeddings
+            else:
+                return func(query)
+        return lambda query: generate_multiple(query, func)
+def get_sources_from_files(
+    files,
+    queries,
+    embedding_function,
+    k,
+    reranking_function,
+    r,
+    hybrid_search,
+):
+    log.debug(f"files: {files} {queries} {embedding_function} {reranking_function}")
+    extracted_collections = []
+    relevant_contexts = []
+    for file in files:
+        if file.get("context") == "full":
+            context = {
+                "documents": [[file.get("file").get("data", {}).get("content")]],
+                "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]],
+            }
+        else:
+            context = None
+            collection_names = []
+            if file.get("type") == "collection":
+                if file.get("legacy"):
+                    collection_names = file.get("collection_names", [])
+                else:
+                    collection_names.append(file["id"])
+            elif file.get("collection_name"):
+                collection_names.append(file["collection_name"])
+            elif file.get("id"):
+                if file.get("legacy"):
+                    collection_names.append(f"{file['id']}")
+                else:
+                    collection_names.append(f"file-{file['id']}")
+            collection_names = set(collection_names).difference(extracted_collections)
+            if not collection_names:
+                log.debug(f"skipping {file} as it has already been extracted")
+                continue
+            try:
+                context = None
+                if file.get("type") == "text":
+                    context = file["content"]
+                else:
+                    if hybrid_search:
+                        try:
+                            context = query_collection_with_hybrid_search(
+                                collection_names=collection_names,
+                                queries=queries,
+                                embedding_function=embedding_function,
+                                k=k,
+                                reranking_function=reranking_function,
+                                r=r,
+                            )
+                        except Exception as e:
+                            log.debug(
+                                "Error when using hybrid search, using"
+                                " non hybrid search as fallback."
+                            )
+                    if (not hybrid_search) or (context is None):
+                        context = query_collection(
+                            collection_names=collection_names,
+                            queries=queries,
+                            embedding_function=embedding_function,
+                            k=k,
+                        )
+            except Exception as e:
+                log.exception(e)
+            extracted_collections.extend(collection_names)
+        if context:
+            if "data" in file:
+                del file["data"]
+            relevant_contexts.append({**context, "file": file})
+    sources = []
+    for context in relevant_contexts:
+        try:
+            if "documents" in context:
+                if "metadatas" in context:
+                    source = {
+                        "source": context["file"],
+                        "document": context["documents"][0],
+                        "metadata": context["metadatas"][0],
+                    }
+                    if "distances" in context and context["distances"]:
+                        source["distances"] = context["distances"][0]
+                    sources.append(source)
+        except Exception as e:
+            log.exception(e)
+    return sources
+def get_model_path(model: str, update_model: bool = False):
+    # Construct huggingface_hub kwargs with local_files_only to return the snapshot path
+    cache_dir = os.getenv("SENTENCE_TRANSFORMERS_HOME")
+    local_files_only = not update_model
+    snapshot_kwargs = {
+        "cache_dir": cache_dir,
+        "local_files_only": local_files_only,
+    }
+    log.debug(f"model: {model}")
+    log.debug(f"snapshot_kwargs: {snapshot_kwargs}")
+    # Inspiration from upstream sentence_transformers
+    if (
+        os.path.exists(model)
+        or ("\\" in model or model.count("/") > 1)
+        and local_files_only
+    ):
+        # If fully qualified path exists, return input, else set repo_id
+        return model
+    elif "/" not in model:
+        # Set valid repo_id for model short-name
+        model = "sentence-transformers" + "/" + model
+    snapshot_kwargs["repo_id"] = model
+    # Attempt to query the huggingface_hub library to determine the local path and/or to update
+    try:
+        model_repo_path = snapshot_download(**snapshot_kwargs)
+        log.debug(f"model_repo_path: {model_repo_path}")
+        return model_repo_path
+    except Exception as e:
+        log.exception(f"Cannot determine model snapshot path: {e}")
+        return model
+def generate_openai_batch_embeddings(
+    model: str, texts: list[str], url: str = "https://api.openai.com/v1", key: str = ""
+) -> Optional[list[list[float]]]:
+    try:
+        r = requests.post(
+            f"{url}/embeddings",
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {key}",
+            },
+            json={"input": texts, "model": model},
+        )
+        r.raise_for_status()
+        data = r.json()
+        if "data" in data:
+            return [elem["embedding"] for elem in data["data"]]
+        else:
+            raise "Something went wrong :/"
+    except Exception as e:
+        print(e)
+        return None
+def generate_ollama_batch_embeddings(
+    model: str, texts: list[str], url: str, key: str = ""
+) -> Optional[list[list[float]]]:
+    try:
+        r = requests.post(
+            f"{url}/api/embed",
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {key}",
+            },
+            json={"input": texts, "model": model},
+        )
+        r.raise_for_status()
+        data = r.json()
+        if "embeddings" in data:
+            return data["embeddings"]
+        else:
+            raise "Something went wrong :/"
+    except Exception as e:
+        print(e)
+        return None
+def generate_embeddings(engine: str, model: str, text: Union[str, list[str]], **kwargs):
+    url = kwargs.get("url", "")
+    key = kwargs.get("key", "")
+    if engine == "ollama":
+        if isinstance(text, list):
+            embeddings = generate_ollama_batch_embeddings(
+                **{"model": model, "texts": text, "url": url, "key": key}
+            )
+        else:
+            embeddings = generate_ollama_batch_embeddings(
+                **{"model": model, "texts": [text], "url": url, "key": key}
+            )
+        return embeddings[0] if isinstance(text, str) else embeddings
+    elif engine == "openai":
+        if isinstance(text, list):
+            embeddings = generate_openai_batch_embeddings(model, text, url, key)
+        else:
+            embeddings = generate_openai_batch_embeddings(model, [text], url, key)
+        return embeddings[0] if isinstance(text, str) else embeddings
+import operator
+from typing import Optional, Sequence
+from langchain_core.callbacks import Callbacks
+from langchain_core.documents import BaseDocumentCompressor, Document
+class RerankCompressor(BaseDocumentCompressor):
+    embedding_function: Any
+    top_n: int
+    reranking_function: Any
+    r_score: float
+    class Config:
+        extra = "forbid"
+        arbitrary_types_allowed = True
+    def compress_documents(
+        self,
+        documents: Sequence[Document],
+        query: str,
+        callbacks: Optional[Callbacks] = None,
+    ) -> Sequence[Document]:
+        reranking = self.reranking_function is not None
+        if reranking:
+            scores = self.reranking_function.predict(
+                [(query, doc.page_content) for doc in documents]
+            )
+        else:
+            from sentence_transformers import util
+            query_embedding = self.embedding_function(query)
+            document_embedding = self.embedding_function(
+                [doc.page_content for doc in documents]
+            )
+            scores = util.cos_sim(query_embedding, document_embedding)[0]
+        docs_with_scores = list(zip(documents, scores.tolist()))
+        if self.r_score:
+            docs_with_scores = [
+                (d, s) for d, s in docs_with_scores if s >= self.r_score
+            ]
+        result = sorted(docs_with_scores, key=operator.itemgetter(1), reverse=True)
+        final_results = []
+        for doc, doc_score in result[: self.top_n]:
+            metadata = doc.metadata
+            metadata["score"] = doc_score
+            doc = Document(
+                page_content=doc.page_content,
+                metadata=metadata,
+            )
+            final_results.append(doc)
+        return final_results

backend/open_webui/apps/retrieval/vector/connector.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from open_webui.config import VECTOR_DB
+if VECTOR_DB == "milvus":
+    from open_webui.apps.retrieval.vector.dbs.milvus import MilvusClient
+    VECTOR_DB_CLIENT = MilvusClient()
+elif VECTOR_DB == "qdrant":
+    from open_webui.apps.retrieval.vector.dbs.qdrant import QdrantClient
+    VECTOR_DB_CLIENT = QdrantClient()
+elif VECTOR_DB == "opensearch":
+    from open_webui.apps.retrieval.vector.dbs.opensearch import OpenSearchClient
+    VECTOR_DB_CLIENT = OpenSearchClient()
+elif VECTOR_DB == "pgvector":
+    from open_webui.apps.retrieval.vector.dbs.pgvector import PgvectorClient
+    VECTOR_DB_CLIENT = PgvectorClient()
+else:
+    from open_webui.apps.retrieval.vector.dbs.chroma import ChromaClient
+    VECTOR_DB_CLIENT = ChromaClient()

backend/open_webui/apps/retrieval/vector/dbs/chroma.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import chromadb
+from chromadb import Settings
+from chromadb.utils.batch_utils import create_batches
+from typing import Optional
+from open_webui.apps.retrieval.vector.main import VectorItem, SearchResult, GetResult
+from open_webui.config import (
+    CHROMA_DATA_PATH,
+    CHROMA_HTTP_HOST,
+    CHROMA_HTTP_PORT,
+    CHROMA_HTTP_HEADERS,
+    CHROMA_HTTP_SSL,
+    CHROMA_TENANT,
+    CHROMA_DATABASE,
+    CHROMA_CLIENT_AUTH_PROVIDER,
+    CHROMA_CLIENT_AUTH_CREDENTIALS,
+)
+class ChromaClient:
+    def __init__(self):
+        settings_dict = {
+            "allow_reset": True,
+            "anonymized_telemetry": False,
+        }
+        if CHROMA_CLIENT_AUTH_PROVIDER is not None:
+            settings_dict["chroma_client_auth_provider"] = CHROMA_CLIENT_AUTH_PROVIDER
+        if CHROMA_CLIENT_AUTH_CREDENTIALS is not None:
+            settings_dict["chroma_client_auth_credentials"] = (
+                CHROMA_CLIENT_AUTH_CREDENTIALS
+            )
+        if CHROMA_HTTP_HOST != "":
+            self.client = chromadb.HttpClient(
+                host=CHROMA_HTTP_HOST,
+                port=CHROMA_HTTP_PORT,
+                headers=CHROMA_HTTP_HEADERS,
+                ssl=CHROMA_HTTP_SSL,
+                tenant=CHROMA_TENANT,
+                database=CHROMA_DATABASE,
+                settings=Settings(**settings_dict),
+            )
+        else:
+            self.client = chromadb.PersistentClient(
+                path=CHROMA_DATA_PATH,
+                settings=Settings(**settings_dict),
+                tenant=CHROMA_TENANT,
+                database=CHROMA_DATABASE,
+            )
+    def has_collection(self, collection_name: str) -> bool:
+        # Check if the collection exists based on the collection name.
+        collections = self.client.list_collections()
+        return collection_name in [collection.name for collection in collections]
+    def delete_collection(self, collection_name: str):
+        # Delete the collection based on the collection name.
+        return self.client.delete_collection(name=collection_name)
+    def search(
+        self, collection_name: str, vectors: list[list[float | int]], limit: int
+    ) -> Optional[SearchResult]:
+        # Search for the nearest neighbor items based on the vectors and return 'limit' number of results.
+        try:
+            collection = self.client.get_collection(name=collection_name)
+            if collection:
+                result = collection.query(
+                    query_embeddings=vectors,
+                    n_results=limit,
+                )
+                return SearchResult(
+                    **{
+                        "ids": result["ids"],
+                        "distances": result["distances"],
+                        "documents": result["documents"],
+                        "metadatas": result["metadatas"],
+                    }
+                )
+            return None
+        except Exception as e:
+            return None
+    def query(
+        self, collection_name: str, filter: dict, limit: Optional[int] = None
+    ) -> Optional[GetResult]:
+        # Query the items from the collection based on the filter.
+        try:
+            collection = self.client.get_collection(name=collection_name)
+            if collection:
+                result = collection.get(
+                    where=filter,
+                    limit=limit,
+                )
+                return GetResult(
+                    **{
+                        "ids": [result["ids"]],
+                        "documents": [result["documents"]],
+                        "metadatas": [result["metadatas"]],
+                    }
+                )
+            return None
+        except Exception as e:
+            print(e)
+            return None
+    def get(self, collection_name: str) -> Optional[GetResult]:
+        # Get all the items in the collection.
+        collection = self.client.get_collection(name=collection_name)
+        if collection:
+            result = collection.get()
+            return GetResult(
+                **{
+                    "ids": [result["ids"]],
+                    "documents": [result["documents"]],
+                    "metadatas": [result["metadatas"]],
+                }
+            )
+        return None
+    def insert(self, collection_name: str, items: list[VectorItem]):
+        # Insert the items into the collection, if the collection does not exist, it will be created.
+        collection = self.client.get_or_create_collection(
+            name=collection_name, metadata={"hnsw:space": "cosine"}
+        )
+        ids = [item["id"] for item in items]
+        documents = [item["text"] for item in items]
+        embeddings = [item["vector"] for item in items]
+        metadatas = [item["metadata"] for item in items]
+        for batch in create_batches(
+            api=self.client,
+            documents=documents,
+            embeddings=embeddings,
+            ids=ids,
+            metadatas=metadatas,
+        ):
+            collection.add(*batch)
+    def upsert(self, collection_name: str, items: list[VectorItem]):
+        # Update the items in the collection, if the items are not present, insert them. If the collection does not exist, it will be created.
+        collection = self.client.get_or_create_collection(
+            name=collection_name, metadata={"hnsw:space": "cosine"}
+        )
+        ids = [item["id"] for item in items]
+        documents = [item["text"] for item in items]
+        embeddings = [item["vector"] for item in items]
+        metadatas = [item["metadata"] for item in items]
+        collection.upsert(
+            ids=ids, documents=documents, embeddings=embeddings, metadatas=metadatas
+        )
+    def delete(
+        self,
+        collection_name: str,
+        ids: Optional[list[str]] = None,
+        filter: Optional[dict] = None,
+    ):
+        # Delete the items from the collection based on the ids.
+        collection = self.client.get_collection(name=collection_name)
+        if collection:
+            if ids:
+                collection.delete(ids=ids)
+            elif filter:
+                collection.delete(where=filter)
+    def reset(self):
+        # Resets the database. This will delete all collections and item entries.
+        return self.client.reset()

backend/open_webui/apps/retrieval/vector/dbs/milvus.py ADDED Viewed

	@@ -0,0 +1,286 @@

+from pymilvus import MilvusClient as Client
+from pymilvus import FieldSchema, DataType
+import json
+from typing import Optional
+from open_webui.apps.retrieval.vector.main import VectorItem, SearchResult, GetResult
+from open_webui.config import (
+    MILVUS_URI,
+)
+class MilvusClient:
+    def __init__(self):
+        self.collection_prefix = "open_webui"
+        self.client = Client(uri=MILVUS_URI)
+    def _result_to_get_result(self, result) -> GetResult:
+        ids = []
+        documents = []
+        metadatas = []
+        for match in result:
+            _ids = []
+            _documents = []
+            _metadatas = []
+            for item in match:
+                _ids.append(item.get("id"))
+                _documents.append(item.get("data", {}).get("text"))
+                _metadatas.append(item.get("metadata"))
+            ids.append(_ids)
+            documents.append(_documents)
+            metadatas.append(_metadatas)
+        return GetResult(
+            **{
+                "ids": ids,
+                "documents": documents,
+                "metadatas": metadatas,
+            }
+        )
+    def _result_to_search_result(self, result) -> SearchResult:
+        ids = []
+        distances = []
+        documents = []
+        metadatas = []
+        for match in result:
+            _ids = []
+            _distances = []
+            _documents = []
+            _metadatas = []
+            for item in match:
+                _ids.append(item.get("id"))
+                _distances.append(item.get("distance"))
+                _documents.append(item.get("entity", {}).get("data", {}).get("text"))
+                _metadatas.append(item.get("entity", {}).get("metadata"))
+            ids.append(_ids)
+            distances.append(_distances)
+            documents.append(_documents)
+            metadatas.append(_metadatas)
+        return SearchResult(
+            **{
+                "ids": ids,
+                "distances": distances,
+                "documents": documents,
+                "metadatas": metadatas,
+            }
+        )
+    def _create_collection(self, collection_name: str, dimension: int):
+        schema = self.client.create_schema(
+            auto_id=False,
+            enable_dynamic_field=True,
+        )
+        schema.add_field(
+            field_name="id",
+            datatype=DataType.VARCHAR,
+            is_primary=True,
+            max_length=65535,
+        )
+        schema.add_field(
+            field_name="vector",
+            datatype=DataType.FLOAT_VECTOR,
+            dim=dimension,
+            description="vector",
+        )
+        schema.add_field(field_name="data", datatype=DataType.JSON, description="data")
+        schema.add_field(
+            field_name="metadata", datatype=DataType.JSON, description="metadata"
+        )
+        index_params = self.client.prepare_index_params()
+        index_params.add_index(
+            field_name="vector",
+            index_type="HNSW",
+            metric_type="COSINE",
+            params={"M": 16, "efConstruction": 100},
+        )
+        self.client.create_collection(
+            collection_name=f"{self.collection_prefix}_{collection_name}",
+            schema=schema,
+            index_params=index_params,
+        )
+    def has_collection(self, collection_name: str) -> bool:
+        # Check if the collection exists based on the collection name.
+        collection_name = collection_name.replace("-", "_")
+        return self.client.has_collection(
+            collection_name=f"{self.collection_prefix}_{collection_name}"
+        )
+    def delete_collection(self, collection_name: str):
+        # Delete the collection based on the collection name.
+        collection_name = collection_name.replace("-", "_")
+        return self.client.drop_collection(
+            collection_name=f"{self.collection_prefix}_{collection_name}"
+        )
+    def search(
+        self, collection_name: str, vectors: list[list[float | int]], limit: int
+    ) -> Optional[SearchResult]:
+        # Search for the nearest neighbor items based on the vectors and return 'limit' number of results.
+        collection_name = collection_name.replace("-", "_")
+        result = self.client.search(
+            collection_name=f"{self.collection_prefix}_{collection_name}",
+            data=vectors,
+            limit=limit,
+            output_fields=["data", "metadata"],
+        )
+        return self._result_to_search_result(result)
+    def query(self, collection_name: str, filter: dict, limit: Optional[int] = None):
+        # Construct the filter string for querying
+        collection_name = collection_name.replace("-", "_")
+        if not self.has_collection(collection_name):
+            return None
+        filter_string = " && ".join(
+            [
+                f'metadata["{key}"] == {json.dumps(value)}'
+                for key, value in filter.items()
+            ]
+        )
+        max_limit = 16383  # The maximum number of records per request
+        all_results = []
+        if limit is None:
+            limit = float("inf")  # Use infinity as a placeholder for no limit
+        # Initialize offset and remaining to handle pagination
+        offset = 0
+        remaining = limit
+        try:
+            # Loop until there are no more items to fetch or the desired limit is reached
+            while remaining > 0:
+                print("remaining", remaining)
+                current_fetch = min(
+                    max_limit, remaining
+                )  # Determine how many items to fetch in this iteration
+                results = self.client.query(
+                    collection_name=f"{self.collection_prefix}_{collection_name}",
+                    filter=filter_string,
+                    output_fields=["*"],
+                    limit=current_fetch,
+                    offset=offset,
+                )
+                if not results:
+                    break
+                all_results.extend(results)
+                results_count = len(results)
+                remaining -= (
+                    results_count  # Decrease remaining by the number of items fetched
+                )
+                offset += results_count
+                # Break the loop if the results returned are less than the requested fetch count
+                if results_count < current_fetch:
+                    break
+            print(all_results)
+            return self._result_to_get_result([all_results])
+        except Exception as e:
+            print(e)
+            return None
+    def get(self, collection_name: str) -> Optional[GetResult]:
+        # Get all the items in the collection.
+        collection_name = collection_name.replace("-", "_")
+        result = self.client.query(
+            collection_name=f"{self.collection_prefix}_{collection_name}",
+            filter='id != ""',
+        )
+        return self._result_to_get_result([result])
+    def insert(self, collection_name: str, items: list[VectorItem]):
+        # Insert the items into the collection, if the collection does not exist, it will be created.
+        collection_name = collection_name.replace("-", "_")
+        if not self.client.has_collection(
+            collection_name=f"{self.collection_prefix}_{collection_name}"
+        ):
+            self._create_collection(
+                collection_name=collection_name, dimension=len(items[0]["vector"])
+            )
+        return self.client.insert(
+            collection_name=f"{self.collection_prefix}_{collection_name}",
+            data=[
+                {
+                    "id": item["id"],
+                    "vector": item["vector"],
+                    "data": {"text": item["text"]},
+                    "metadata": item["metadata"],
+                }
+                for item in items
+            ],
+        )
+    def upsert(self, collection_name: str, items: list[VectorItem]):
+        # Update the items in the collection, if the items are not present, insert them. If the collection does not exist, it will be created.
+        collection_name = collection_name.replace("-", "_")
+        if not self.client.has_collection(
+            collection_name=f"{self.collection_prefix}_{collection_name}"
+        ):
+            self._create_collection(
+                collection_name=collection_name, dimension=len(items[0]["vector"])
+            )
+        return self.client.upsert(
+            collection_name=f"{self.collection_prefix}_{collection_name}",
+            data=[
+                {
+                    "id": item["id"],
+                    "vector": item["vector"],
+                    "data": {"text": item["text"]},
+                    "metadata": item["metadata"],
+                }
+                for item in items
+            ],
+        )
+    def delete(
+        self,
+        collection_name: str,
+        ids: Optional[list[str]] = None,
+        filter: Optional[dict] = None,
+    ):
+        # Delete the items from the collection based on the ids.
+        collection_name = collection_name.replace("-", "_")
+        if ids:
+            return self.client.delete(
+                collection_name=f"{self.collection_prefix}_{collection_name}",
+                ids=ids,
+            )
+        elif filter:
+            # Convert the filter dictionary to a string using JSON_CONTAINS.
+            filter_string = " && ".join(
+                [
+                    f'metadata["{key}"] == {json.dumps(value)}'
+                    for key, value in filter.items()
+                ]
+            )
+            return self.client.delete(
+                collection_name=f"{self.collection_prefix}_{collection_name}",
+                filter=filter_string,
+            )
+    def reset(self):
+        # Resets the database. This will delete all collections and item entries.
+        collection_names = self.client.list_collections()
+        for collection_name in collection_names:
+            if collection_name.startswith(self.collection_prefix):
+                self.client.drop_collection(collection_name=collection_name)