Spaces:

intoxication
/

wbrule

Configuration error

App Files Files Community

intoxication commited on Sep 3, 2023

Commit

3fe47db

•

1 Parent(s): 4d43e49

Upload 12 files

Browse files

Files changed (13) hide show

.gitattributes +1 -0
Dockerfile +14 -0
LICENSE +202 -0
README.md +139 -10
Slides - How to Build a QA Application With Haystack.pdf +3 -0
docker-compose.yml +44 -0
haystack-api/pipelines_biobert.haystack-pipeline.yml +75 -0
pyproject.toml +73 -0
ui/__about__.py +10 -0
ui/__init__.py +0 -0
ui/eval_labels_example.csv +10 -0
ui/utils.py +123 -0
ui/webapp.py +294 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Slides[[:space:]]-[[:space:]]How[[:space:]]to[[:space:]]Build[[:space:]]a[[:space:]]QA[[:space:]]Application[[:space:]]With[[:space:]]Haystack.pdf filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+FROM python:3.10-slim
+# copy code
+COPY . /ui
+# install as a package
+RUN pip install --upgrade pip && \
+    pip install /ui/
+WORKDIR /ui
+EXPOSE 8501
+# cmd for running the API
+CMD ["python", "-m", "streamlit", "run", "ui/webapp.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,202 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2021 deepset GmbH
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md CHANGED Viewed

@@ -1,10 +1,139 @@
----
-title: Wbrule
-emoji: 🐠
-colorFrom: red
-colorTo: yellow
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+## Question Answering Application for Healthcare
+This is a streamlit-based NLP application powering a question answering demo on healthcare data. It's easy to change and extend and can be used to try out Haystack's capabilities.
+A video presentation of this demo is available on [YouTube](https://www.youtube.com/watch?v=pOnkGdOvYfo). To get started with Haystack please visit the [README](https://github.com/deepset-ai/haystack/tree/main#key-components) or check out our [tutorials](https://haystack.deepset.ai/tutorials/first-qa-system).
+## Usage
+The easiest way to run the application is through [Docker compose](https://docs.docker.com/compose/).
+From this folder, just run:
+```sh
+docker compose up -d
+```
+Docker will start three containers:
+- `elasticsearch`, running an Elasticsearch instance with some data pre-loaded.
+- `haystack-api`, running a pre-loaded Haystack pipeline behind a RESTful API.
+- `ui`, running the streamlit application showing the UI and querying Haystack under the hood.
+Once all the containers are up and running, you can open the user interface pointing your
+browser to [http://localhost:8501](http://localhost:8501).
+## Screencast
+https://user-images.githubusercontent.com/4181769/231965471-48d581a2-e1aa-4316-b3a4-990d9c86800e.mov
+## Evaluation Mode
+The evaluation mode leverages the feedback REST API endpoint of haystack. The user has the options
+"Wrong answer", "Wrong answer and wrong passage" and "Wrong answer and wrong passage" to give
+feedback.
+In order to use the UI in evaluation mode, you need an ElasticSearch instance with pre-indexed files
+and the Haystack REST API. You can set the environment up via docker images. For ElasticSearch, you
+can check out our [documentation](https://haystack.deepset.ai/usage/document-store#initialisation)
+and for setting up the REST API this [link](https://github.com/deepset-ai/haystack/blob/main/README.
+md#7-rest-api).
+To enter the evaluation mode, select the checkbox "Evaluation mode" in the sidebar. The UI will load
+the predefined questions from the file [`eval_labels_examples`](https://raw.githubusercontent.com/
+deepset-ai/haystack/main/ui/ui/eval_labels_example.csv). The file needs to be prefilled with your
+data. This way, the user will get a random question from the set and can give his feedback with the
+buttons below the questions. To load a new question, click the button "Get random question".
+The file just needs to have two columns separated by semicolon. You can add more columns but the UI
+will ignore them. Every line represents a questions answer pair. The columns with the questions needs
+to be named “Question Text” and the answer column “Answer” so that they can be loaded correctly.
+Currently, the easiest way to create the file is manually by adding question answer pairs.
+The feedback can be exported with the API endpoint `export-doc-qa-feedback`. To learn more about
+finetuning a model with user feedback, please check out our [docs](https://haystack.deepset.ai/usage/
+domain-adaptation#user-feedback).
+## Query different data
+If you want to use this application to query a different corpus, the easiest way is to build the
+Elasticsearch image, load your own text data and then use the same Compose file to run all the
+three containers needed. This will require [Docker](https://docs.docker.com/get-docker/) to be
+properly installed on your machine.
+### Running your custom build
+Once done, modify the `elasticsearch` section in the `docker-compose.yml` file, changing this line:
+```yaml
+ image: "julianrisch/elasticsearch-healthcare"
+```
+to:
+```yaml
+ image: "my-docker-acct/elasticsearch-custom"
+```
+Finally, run the compose file as usual:
+```sh
+docker-compose up
+```
+## Development
+If you want to change the streamlit application, you need to setup your Python environment first.
+From a virtual environment, run:
+```sh
+pip install -e .
+```
+The app requires the Haystack RESTful API to be ready and accepting connections at `http://localhost:8000`, you can use Docker compose to start only the required containers:
+```sh
+docker-compose up elasticsearch haystack-api
+```
+At this point you should be able to make changes and run the streamlit application with:
+```
+streamlit run ui/webapp.py
+```
+## Using GPUs with Docker
+Assuming you have [nvidia drivers installed](https://developer.nvidia.com/cuda-downloads) on your machine, you can configure docker to use the GPU for the Haystack API container to speed it up.
+First, configure the nvidia repository as described here: https://nvidia.github.io/nvidia-container-runtime/. For example:
+```sh
+curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | \
+  sudo apt-key add -
+distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
+curl -s -L https://nvidia.github.io/nvidia-container-runtime/$distribution/nvidia-container-runtime.list | \
+  sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list
+sudo apt-get update
+```
+Then, install nvidia-container-runtime as described here: https://docs.docker.com/config/containers/resource_constraints/#access-an-nvidia-gpu.
+For example:
+```sh
+sudo apt-get install nvidia-container-runtime
+```
+Restart the Docker daemon (or simply the machine).
+Finally, you can change the docker compose file `healthcare/docker-compose.yml` so that a docker image prepared for usage with GPUs is used and one GPU is reserved for the Haystack API container:
+```yaml
+  haystack-api:
+    image: "deepset/haystack:gpu-v1.14.0"
+    ports:
+      - 8000:8000
+    restart: on-failure
+    volumes:
+      - ./haystack-api:/home/node/app
+    environment:
+      - DOCUMENTSTORE_PARAMS_HOST=elasticsearch
+      - PIPELINE_YAML_PATH=/home/node/app/pipelines_biobert.haystack-pipeline.yml
+    depends_on:
+      elasticsearch:
+        condition: service_healthy
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+```

Slides - How to Build a QA Application With Haystack.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e280926d06c88738dc87546feaa66973c35cd7689a520da208bbcc66217bb014
+size 4073318

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,44 @@

+version: "3"
+services:
+  elasticsearch:
+    image: "julianrisch/elasticsearch-healthcare"
+    ports:
+      - 9200:9200
+    restart: on-failure
+    # Uncomment the healthcheck section on Apple M1, as on M1 elasticsearch might need longer to start
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9200/_cat/health"]
+      interval: 10s
+      timeout: 1s
+      retries: 30
+      start_period: "30s"
+  haystack-api:
+    image: "deepset/haystack:cpu-v1.14.0"
+    ports:
+      - 8000:8000
+    restart: on-failure
+    volumes:
+      - ./haystack-api:/home/node/app
+    environment:
+      - DOCUMENTSTORE_PARAMS_HOST=elasticsearch
+      - PIPELINE_YAML_PATH=/home/node/app/pipelines_biobert.haystack-pipeline.yml
+    depends_on:
+      elasticsearch:
+        condition: service_healthy
+  ui:
+    image: "julianrisch/demo-healthcare"
+    ports:
+      - 8501:8501
+    restart: on-failure
+    environment:
+      - API_ENDPOINT=http://haystack-api:8000
+      # The value fot the following variables will be read from the host, if present.
+      # They can also be temporarily set for docker-compose, for example:
+      # $ DISABLE_FILE_UPLOAD=1 DEFAULT_DOCS_FROM_RETRIEVER=5 docker-compose up
+      - DEFAULT_QUESTION_AT_STARTUP
+      - DEFAULT_DOCS_FROM_RETRIEVER
+      - DEFAULT_NUMBER_OF_ANSWERS
+    command: "/bin/bash -c 'sleep 15 && python -m streamlit run ui/webapp.py'"

haystack-api/pipelines_biobert.haystack-pipeline.yml ADDED Viewed

	@@ -0,0 +1,75 @@

+version: ignore
+components:
+  - name: DocumentStore
+    type: ElasticsearchDocumentStore
+    params:
+      host: localhost
+  - name: Retriever # Selects the most relevant documents from the document store and passes them on to the Reader
+    type: EmbeddingRetriever # Uses a Transformer model to encode the document and the query
+    params:
+      document_store: DocumentStore
+      embedding_model: sentence-transformers/multi-qa-mpnet-base-dot-v1 # multi-qa-MiniLM-L6-dot-v1
+      embed_meta_fields:
+        - filename
+      top_k: 10 # The number of results to return
+  - name: BM25
+    type: BM25Retriever
+    params:
+      document_store: DocumentStore
+      top_k: 10
+  - name: Joiner
+    type: JoinDocuments
+    params:
+      join_mode: reciprocal_rank_fusion
+  - name: Reader # The component that actually fetches answers from among the 20 documents returned by retriever
+    type: FARMReader # Transformer-based reader, specializes in extractive QA
+    params:
+      model_name_or_path: dmis-lab/biobert-large-cased-v1.1-squad # dmis-lab/biobert-base-cased-v1.1-squad
+      context_window_size: 700 # The size of the window around the answer span
+  - name: FileTypeClassifier # Routes files based on their extension to appropriate converters, by default txt, pdf, md, docx, html
+    type: FileTypeClassifier
+  - name: TextConverter # Converts files into documents
+    type: TextConverter
+  - name: PDFConverter # Converts PDFs into documents
+    type: PDFToTextConverter
+  - name: Preprocessor # Splits documents into smaller ones and cleans them up
+    type: PreProcessor
+    params:
+      # With a vector-based retriever, it's good to split your documents into smaller ones
+      split_by: word # The unit by which you want to split the documents
+      split_length: 250 # The max number of words in a document
+      split_overlap: 20 # Enables the sliding window approach
+      split_respect_sentence_boundary: True # Retains complete sentences in split documents
+      language: en # Used by NLTK to best detect the sentence boundaries for that language
+# Here you define how the nodes are organized in the pipelines
+# For each node, specify its input
+pipelines:
+  - name: query
+    nodes:
+      - name: Retriever
+        inputs: [Query]
+      - name: BM25
+        inputs: [Query]
+      - name: Joiner
+        inputs: [Retriever, BM25]
+      - name: Reader
+        inputs: [Joiner]
+  - name: indexing
+    nodes:
+    # Depending on the file type, we use a Text or PDF converter
+      - name: FileTypeClassifier
+        inputs: [File]
+      - name: TextConverter
+        inputs: [FileTypeClassifier.output_1] # Ensures this converter receives TXT files
+      - name: PDFConverter
+        inputs: [FileTypeClassifier.output_2] # Ensures this converter receives PDFs
+      - name: Preprocessor
+        inputs: [TextConverter, PDFConverter]
+      - name: Retriever
+        inputs: [Preprocessor]
+      - name: DocumentStore
+        inputs: [Retriever]

pyproject.toml ADDED Viewed

	@@ -0,0 +1,73 @@

+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "ui"
+description = 'Minimal UI for Haystack (https://github.com/deepset-ai/haystack)'
+readme = "README.md"
+requires-python = ">=3.7"
+license = "Apache-2.0"
+keywords = []
+authors = [
+  { name = "deepset.ai", email = "malte.pietsch@deepset.ai" },
+]
+classifiers = [
+  "Development Status :: 5 - Production/Stable",
+  "Intended Audience :: Science/Research",
+  "Topic :: Scientific/Engineering :: Artificial Intelligence",
+  "Operating System :: OS Independent",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3.7",
+  "Programming Language :: Python :: 3.8",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: Implementation :: CPython",
+]
+dependencies = [
+    "streamlit >= 1.9.0, < 2",
+    "st-annotated-text >= 2.0.0, < 3",
+    "markdown >= 3.3.4, < 4"
+]
+dynamic = ["version"]
+[project.urls]
+Documentation = "https://github.com/deepset-ai/haystack/tree/main/ui#readme"
+Issues = "https://github.com/deepset-ai/haystack/issues"
+Source = "https://github.com/deepset-ai/haystack/tree/main/ui"
+[tool.hatch.version]
+path = "ui/__about__.py"
+[tool.hatch.build.targets.sdist]
+[tool.hatch.build.targets.wheel]
+[tool.hatch.envs.default]
+dependencies = [
+  "pytest",
+  "pytest-cov",
+]
+[tool.hatch.envs.default.scripts]
+cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=ui --cov=tests"
+no-cov = "cov --no-cov"
+[[tool.hatch.envs.test.matrix]]
+python = ["37", "38", "39", "310"]
+[tool.coverage.run]
+branch = true
+parallel = true
+omit = [
+  "ui/__about__.py",
+]
+[tool.coverage.report]
+exclude_lines = [
+  "no cov",
+  "if __name__ == .__main__.:",
+  "if TYPE_CHECKING:",
+]
+[tool.black]
+line-length = 120
+skip_magic_trailing_comma = true  # For compatibility with pydoc>=4.6, check if still needed.

ui/__about__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import logging
+from pathlib import Path
+__version__ = "0.0.0"
+try:
+    __version__ = open(Path(__file__).parent.parent / "VERSION.txt", "r").read()
+except Exception as e:
+    logging.exception("No VERSION.txt found!")

ui/__init__.py ADDED Viewed

File without changes

ui/eval_labels_example.csv ADDED Viewed

	@@ -0,0 +1,10 @@

+"Question Text";"Answer"
+"What are treatments for oesophageal cancer?";"radical surgery"
+"What are symptoms of an infusion reaction?";""
+"What reduces inflammation in cancer patients?";"physical exercise"
+"What are the symptoms of cancer related fatigue?";"diminished energy, increased need to rest"
+"How can pain be treated in cancer patients?";"using opioid combination therapies and carefully dosed adjuvants"
+"What side-effects can occur if pain is treated with opioids?";
+"What symptoms may occur when using Ipilimumab?";"Pruritus, maculopapular rash, cough, shortness of breath, chills, rigors, facial ﬂushing, chest, abdominal or back pain."
+"How should a patient with an anaphylactic reaction be handled?"; "an observation period"
+"Should stem-cell transplantation be offered to young adults?";"In the younger population, consolidation with autologous stem cell transplantation (ASCT) in patients achieving a CR has been shown to improve long-term outcomes"

ui/utils.py ADDED Viewed

	@@ -0,0 +1,123 @@

+# pylint: disable=missing-timeout
+from typing import List, Dict, Any, Tuple, Optional
+import os
+import logging
+from time import sleep
+import requests
+import streamlit as st
+API_ENDPOINT = os.getenv("API_ENDPOINT", "http://localhost:8000")
+STATUS = "initialized"
+HS_VERSION = "hs_version"
+DOC_REQUEST = "query"
+DOC_FEEDBACK = "feedback"
+DOC_UPLOAD = "file-upload"
+def haystack_is_ready():
+    """
+    Used to show the "Haystack is loading..." message
+    """
+    url = f"{API_ENDPOINT}/{STATUS}"
+    try:
+        if requests.get(url).status_code < 400:
+            return True
+    except Exception as e:
+        logging.exception(e)
+        sleep(1)  # To avoid spamming a non-existing endpoint at startup
+    return False
+def haystack_version():
+    """
+    Get the Haystack version from the REST API
+    """
+    url = f"{API_ENDPOINT}/{HS_VERSION}"
+    return requests.get(url, timeout=0.1).json()["hs_version"]
+def query(query, filters={}, top_k_reader=5, top_k_retriever=5) -> Tuple[List[Dict[str, Any]], Dict[str, str]]:
+    """
+    Send a query to the REST API and parse the answer.
+    Returns both a ready-to-use representation of the results and the raw JSON.
+    """
+    url = f"{API_ENDPOINT}/{DOC_REQUEST}"
+    params = {"filters": filters, "Retriever": {"top_k": top_k_retriever}, "Reader": {"top_k": top_k_reader}}
+    req = {"query": query, "params": params}
+    response_raw = requests.post(url, json=req)
+    if response_raw.status_code >= 400 and response_raw.status_code != 503:
+        raise Exception(f"{vars(response_raw)}")
+    response = response_raw.json()
+    if "errors" in response:
+        raise Exception(", ".join(response["errors"]))
+    # Format response
+    results = []
+    answers = response["answers"]
+    for answer in answers:
+        if answer.get("answer", None):
+            results.append(
+                {
+                    "context": "..." + answer["context"] + "...",
+                    "answer": answer.get("answer", None),
+                    "source": answer["meta"]["name"],
+                    "relevance": round(answer["score"] * 100, 2),
+                    "document": [doc for doc in response["documents"] if doc["id"] in answer["document_ids"]][0],
+                    "offset_start_in_doc": answer["offsets_in_document"][0]["start"],
+                    "_raw": answer,
+                }
+            )
+        else:
+            results.append(
+                {
+                    "context": None,
+                    "answer": None,
+                    "document": None,
+                    "relevance": round(answer["score"] * 100, 2),
+                    "_raw": answer,
+                }
+            )
+    return results, response
+def send_feedback(query, answer_obj, is_correct_answer, is_correct_document, document) -> None:
+    """
+    Send a feedback (label) to the REST API
+    """
+    url = f"{API_ENDPOINT}/{DOC_FEEDBACK}"
+    req = {
+        "query": query,
+        "document": document,
+        "is_correct_answer": is_correct_answer,
+        "is_correct_document": is_correct_document,
+        "origin": "user-feedback",
+        "answer": answer_obj,
+    }
+    response_raw = requests.post(url, json=req)
+    if response_raw.status_code >= 400:
+        raise ValueError(f"An error was returned [code {response_raw.status_code}]: {response_raw.json()}")
+def upload_doc(file):
+    url = f"{API_ENDPOINT}/{DOC_UPLOAD}"
+    files = [("files", file)]
+    response = requests.post(url, files=files).json()
+    return response
+def get_backlink(result) -> Tuple[Optional[str], Optional[str]]:
+    if result.get("document", None):
+        doc = result["document"]
+        if isinstance(doc, dict):
+            if doc.get("meta", None):
+                if isinstance(doc["meta"], dict):
+                    if doc["meta"].get("url", None) and doc["meta"].get("title", None):
+                        return doc["meta"]["url"], doc["meta"]["title"]
+    return None, None

ui/webapp.py ADDED Viewed

	@@ -0,0 +1,294 @@

+import os
+import sys
+import logging
+from pathlib import Path
+from json import JSONDecodeError
+import pandas as pd
+import streamlit as st
+from annotated_text import annotation
+from markdown import markdown
+from ui.utils import haystack_is_ready, query, send_feedback, upload_doc, haystack_version, get_backlink
+# Adjust to a question that you would like users to see in the search bar when they load the UI:
+DEFAULT_QUESTION_AT_STARTUP = os.getenv("DEFAULT_QUESTION_AT_STARTUP", "What are the symptoms of cancer related fatigue?")
+DEFAULT_ANSWER_AT_STARTUP = os.getenv("DEFAULT_ANSWER_AT_STARTUP", "diminished energy, increased need to rest")
+# Sliders
+DEFAULT_DOCS_FROM_RETRIEVER = int(os.getenv("DEFAULT_DOCS_FROM_RETRIEVER", "3"))
+DEFAULT_NUMBER_OF_ANSWERS = int(os.getenv("DEFAULT_NUMBER_OF_ANSWERS", "3"))
+# Labels for the evaluation
+EVAL_LABELS = os.getenv("EVAL_FILE", str(Path(__file__).parent / "eval_labels_example.csv"))
+# Whether the file upload should be enabled or not
+DISABLE_FILE_UPLOAD = bool(os.getenv("DISABLE_FILE_UPLOAD"))
+def set_state_if_absent(key, value):
+    if key not in st.session_state:
+        st.session_state[key] = value
+def main():
+    st.set_page_config(page_title="Haystack Demo", page_icon="https://haystack.deepset.ai/img/HaystackIcon.png")
+    # Persistent state
+    set_state_if_absent("question", DEFAULT_QUESTION_AT_STARTUP)
+    set_state_if_absent("answer", DEFAULT_ANSWER_AT_STARTUP)
+    set_state_if_absent("results", None)
+    set_state_if_absent("raw_json", None)
+    set_state_if_absent("random_question_requested", False)
+    # Small callback to reset the interface in case the text of the question changes
+    def reset_results(*args):
+        st.session_state.answer = None
+        st.session_state.results = None
+        st.session_state.raw_json = None
+    # Title
+    st.write("# Healthcare Demo")
+    st.markdown(
+        """
+Ask a question and see if Haystack can find the correct answer to your query!
+*Note: do not use keywords, but full-fledged questions.* The demo is not optimized to deal with keyword queries and might misunderstand you.
+""",
+        unsafe_allow_html=True,
+    )
+    # Sidebar
+    st.sidebar.header("Options")
+    top_k_reader = st.sidebar.slider(
+        "Max. number of answers",
+        min_value=1,
+        max_value=10,
+        value=DEFAULT_NUMBER_OF_ANSWERS,
+        step=1,
+        on_change=reset_results,
+    )
+    top_k_retriever = st.sidebar.slider(
+        "Max. number of documents from retriever",
+        min_value=1,
+        max_value=10,
+        value=DEFAULT_DOCS_FROM_RETRIEVER,
+        step=1,
+        on_change=reset_results,
+    )
+    eval_mode = st.sidebar.checkbox("Evaluation mode")
+    debug = st.sidebar.checkbox("Show debug info")
+    # File upload block
+    if not DISABLE_FILE_UPLOAD:
+        st.sidebar.write("## File Upload:")
+        data_files = st.sidebar.file_uploader(
+            "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
+        )
+        for data_file in data_files:
+            # Upload file
+            if data_file:
+                try:
+                    raw_json = upload_doc(data_file)
+                    st.sidebar.write(str(data_file.name) + " &nbsp;&nbsp; ✅ ")
+                    if debug:
+                        st.subheader("REST API JSON response")
+                        st.sidebar.write(raw_json)
+                except Exception as e:
+                    st.sidebar.write(str(data_file.name) + " &nbsp;&nbsp; ❌ ")
+                    st.sidebar.write("_This file could not be parsed, see the logs for more information._")
+    hs_version = ""
+    try:
+        hs_version = f" <small>(v{haystack_version()})</small>"
+    except Exception:
+        pass
+    st.sidebar.markdown(
+        f"""
+    <style>
+        a {{
+            text-decoration: none;
+        }}
+        .haystack-footer {{
+            text-align: center;
+        }}
+        .haystack-footer h4 {{
+            margin: 0.1rem;
+            padding:0;
+        }}
+        footer {{
+            opacity: 0;
+        }}
+    </style>
+    <div class="haystack-footer">
+        <hr />
+        <h4>Built with <a href="https://haystack.deepset.ai/">Haystack</a> 1.14.0</h4>
+        <p>Get it on <a href="https://github.com/deepset-ai/haystack/">GitHub</a> &nbsp;&nbsp; - &nbsp;&nbsp; Read the <a href="https://docs.haystack.deepset.ai/docs">Docs</a></p>
+    </div>
+    """,
+        unsafe_allow_html=True,
+    )
+    # Load csv into pandas dataframe
+    try:
+        df = pd.read_csv(EVAL_LABELS, sep=";")
+    except Exception:
+        st.error(
+            f"The eval file was not found. Please check the demo's [README](https://github.com/deepset-ai/haystack/tree/main/ui/README.md) for more information."
+        )
+        sys.exit(
+            f"The eval file was not found under `{EVAL_LABELS}`. Please check the README (https://github.com/deepset-ai/haystack/tree/main/ui/README.md) for more information."
+        )
+    # Search bar
+    question = st.text_input(
+        value=st.session_state.question,
+        max_chars=100,
+        on_change=reset_results,
+        label="question",
+        label_visibility="hidden",
+    )
+    col1, col2 = st.columns(2)
+    col1.markdown("<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
+    col2.markdown("<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
+    # Run button
+    run_pressed = col1.button("Run")
+    # Get next random question from the CSV
+    if col2.button("Random question"):
+        reset_results()
+        new_row = df.sample(1)
+        while (
+            new_row["Question Text"].values[0] == st.session_state.question
+        ):  # Avoid picking the same question twice (the change is not visible on the UI)
+            new_row = df.sample(1)
+        st.session_state.question = new_row["Question Text"].values[0]
+        st.session_state.answer = new_row["Answer"].values[0]
+        st.session_state.random_question_requested = True
+        # Re-runs the script setting the random question as the textbox value
+        # Unfortunately necessary as the Random Question button is _below_ the textbox
+        if hasattr(st, "scriptrunner"):
+            raise st.scriptrunner.script_runner.RerunException(
+                st.scriptrunner.script_requests.RerunData(widget_states=None)
+            )
+        raise st.runtime.scriptrunner.script_runner.RerunException(
+            st.runtime.scriptrunner.script_requests.RerunData(widget_states=None)
+        )
+    st.session_state.random_question_requested = False
+    run_query = (
+        run_pressed or question != st.session_state.question
+    ) and not st.session_state.random_question_requested
+    # Check the connection
+    with st.spinner("⌛️ &nbsp;&nbsp; Haystack is starting..."):
+        if not haystack_is_ready():
+            st.error("🚫 &nbsp;&nbsp; Connection Error. Is Haystack running?")
+            run_query = False
+            reset_results()
+    # Get results for query
+    if run_query and question:
+        reset_results()
+        st.session_state.question = question
+        with st.spinner(
+            "🧠 &nbsp;&nbsp; Performing neural search on documents... \n "
+            "Do you want to optimize speed or accuracy? \n"
+            "Check out the docs: https://haystack.deepset.ai/usage/optimization "
+        ):
+            try:
+                st.session_state.results, st.session_state.raw_json = query(
+                    question, top_k_reader=top_k_reader, top_k_retriever=top_k_retriever
+                )
+            except JSONDecodeError as je:
+                st.error("👓 &nbsp;&nbsp; An error occurred reading the results. Is the document store working?")
+                return
+            except Exception as e:
+                logging.exception(e)
+                if "The server is busy processing requests" in str(e) or "503" in str(e):
+                    st.error("🧑‍🌾 &nbsp;&nbsp; All our workers are busy! Try again later.")
+                else:
+                    st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
+                return
+    if st.session_state.results:
+        # Show the gold answer if we use a question of the given set
+        if eval_mode and st.session_state.answer:
+            st.write("## Correct answer:")
+            st.write(st.session_state.answer)
+        st.write("## Results:")
+        for count, result in enumerate(st.session_state.results):
+            if result["answer"]:
+                answer, context = result["answer"], result["context"]
+                start_idx = context.find(answer)
+                end_idx = start_idx + len(answer)
+                # Hack due to this bug: https://github.com/streamlit/streamlit/issues/3190
+                st.write(
+                    markdown(context[:start_idx] + str(annotation(answer, "ANSWER", "#8ef")) + context[end_idx:]),
+                    unsafe_allow_html=True,
+                )
+                source = ""
+                url, title = get_backlink(result)
+                if url and title:
+                    source = f"[{result['document']['meta']['title']}]({result['document']['meta']['url']})"
+                else:
+                    source = f"{result['source']}"
+                st.markdown(f"**Relevance:** {result['relevance']} -  **Source:** {source}")
+            else:
+                st.info(
+                    "🤔 &nbsp;&nbsp; Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
+                )
+                st.write("**Relevance:** ", result["relevance"])
+            if eval_mode and result["answer"]:
+                # Define columns for buttons
+                is_correct_answer = None
+                is_correct_document = None
+                button_col1, button_col2, button_col3, _ = st.columns([1, 1, 1, 6])
+                if button_col1.button("👍", key=f"{result['context']}{count}1", help="Correct answer"):
+                    is_correct_answer = True
+                    is_correct_document = True
+                if button_col2.button("👎", key=f"{result['context']}{count}2", help="Wrong answer and wrong passage"):
+                    is_correct_answer = False
+                    is_correct_document = False
+                if button_col3.button(
+                    "👎👍", key=f"{result['context']}{count}3", help="Wrong answer, but correct passage"
+                ):
+                    is_correct_answer = False
+                    is_correct_document = True
+                if is_correct_answer is not None and is_correct_document is not None:
+                    try:
+                        send_feedback(
+                            query=question,
+                            answer_obj=result["_raw"],
+                            is_correct_answer=is_correct_answer,
+                            is_correct_document=is_correct_document,
+                            document=result["document"],
+                        )
+                        st.success("✨ &nbsp;&nbsp; Thanks for your feedback! &nbsp;&nbsp; ✨")
+                    except Exception as e:
+                        logging.exception(e)
+                        st.error("🐞 &nbsp;&nbsp; An error occurred while submitting your feedback!")
+            st.write("___")
+        if debug:
+            st.subheader("REST API JSON response")
+            st.write(st.session_state.raw_json)
+main()