import gradio as gr from llama_index.core import VectorStoreIndex,StorageContext from llama_index.core.memory import ChatMemoryBuffer import re from llama_index.core import get_response_synthesizer from llama_index.core.query_engine import RetrieverQueryEngine # Retrievers from llama_index.core.retrievers import ( VectorIndexRetriever, ) from llama_index.core.chat_engine import ContextChatEngine from llama_index.core.memory import ChatMemoryBuffer from pinecone import Pinecone from llama_index.vector_stores.pinecone import PineconeVectorStore import time from utils import * import spaces import threading import sys import torch head = """ """ css = """ #chatbot { margin-top: 1%; width: 75%; position:relative; height:70%; } #textBox{ width: 75%; position:relative; } .wrapper.svelte-nab2ao p{ font-size: 14px; } #btnClear{ width: 75%; } #buttonChat{ width:50%; position: relative; } #colonnaElementi{ position: absolute; left: 77%; top: 10%; bottom: 10%; /* Adjust this value as necessary */ width: 10%; height: auto; /* Let the height be determined by the top and bottom properties */ max-height: 80%; /* Ensure it does not exceed 80% of the parent container's height */ overflow-y: auto; /* Allow scrolling if content overflows vertically */ overflow-x: hidden; /* Hide horizontal overflow */ word-wrap: break-word; /* Ensure words break to fit within the width */ box-sizing: border-box; /* Include padding and border in the element's total width and height */ } #responseMode{ width: 5%; } .message.user.svelte-gutj6d.message-bubble-border{ padding: 5px; } .message.bot.svelte-gutj6d.message-bubble-border{ padding: 5px; } .icon { cursor: pointer; } /* Style for the hidden text */ .hidden-text { display: none; } .wrap svelte-1sk0pyu{ width: 12% } """ def main(): user_message = "" current_chat_mode = "" current_response_mode = "compact" current_collection = "" file_path = "" num_responses = 0 current_chat_mode = "STANDARD" retriever = None token_count_bandi = 0 token_count_bandi_sistema_puglia = 0 chat_engine_bandi = None chat_engine_bandi_sistema_puglia = None memory_bandi = None memory_bandi_sistema_puglia = None stream_response = None divDocumenti = None setGPU() llm = setLLM() Settings.llm = llm Settings.embed_model = "local:google-bert/bert-base-multilingual-cased" embed_model = Settings.embed_model text_qa_template, refine_template = setPromptTemplate() def select_initial_collection(): global current_collection global retriever pc = Pinecone(api_key="7e412663-a2dc-44a6-ab57-25dd0bdce226") # connect to index pinecone_index = pc.Index("indexbandisistemapuglia") vector_store = PineconeVectorStore( pinecone_index=pinecone_index, add_sparse_vector=True, ) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex.from_vector_store( vector_store, storage_context=storage_context ) retriever = VectorIndexRetriever(index=index, similarity_top_k=3, vector_store_query_mode="hybrid", embed_model=embed_model, alpha=0.5) current_collection = "BANDI_SISTEMA_PUGLIA" return "collezione settata" def select_collection(evt: gr.SelectData): global current_collection global retriever global chat_engine_bandi global chat_engine_bandi_sistema_puglia global token_count_bandi global token_count_bandi_sistema_puglia global memory_bandi global memory_bandi_sistema_puglia selected_collection = evt.value if(selected_collection != current_collection): if(selected_collection == "BANDI_SISTEMA_PUGLIA"): chat_engine_bandi.reset() chat_engine_bandi_sistema_puglia.reset() memory_bandi_sistema_puglia.reset() memory_bandi.reset() token_count_bandi = 0 token_count_bandi_sistema_puglia = 0 pc = Pinecone(api_key="7e412663-a2dc-44a6-ab57-25dd0bdce226") # connect to index pinecone_index = pc.Index("indexbandisistemapuglia") vector_store = PineconeVectorStore( pinecone_index=pinecone_index, add_sparse_vector=True, ) storage_context = StorageContext.from_defaults(vector_store=vector_store) # load your index from stored vectors index = VectorStoreIndex.from_vector_store( vector_store, storage_context=storage_context ) retriever = VectorIndexRetriever(index=index, similarity_top_k=3, vector_store_query_mode="hybrid", embed_model=embed_model, alpha=0.5) else: chat_engine_bandi.reset() chat_engine_bandi_sistema_puglia.reset() memory_bandi_sistema_puglia.reset() memory_bandi.reset() token_count_bandi = 0 token_count_bandi_sistema_puglia = 0 pc = Pinecone(api_key="7e412663-a2dc-44a6-ab57-25dd0bdce226") # connect to index pinecone_index = pc.Index("indexbandi") vector_store = PineconeVectorStore( pinecone_index=pinecone_index, add_sparse_vector=True, ) storage_context = StorageContext.from_defaults(vector_store=vector_store) # load your index from stored vectors index = VectorStoreIndex.from_vector_store( vector_store, storage_context=storage_context ) retriever = VectorIndexRetriever(index=index, similarity_top_k=3, vector_store_query_mode="hybrid", embed_model=embed_model, alpha=0.4) current_collection = selected_collection return "
"+node.metadata['nome_bando']+"
Nodo 🔍
"+node.text+"
" history[-1][1] = "" for character in stream_response.response_gen: tokens = character.split(" ") num_tokens = len(tokens) token_count_bandi = token_count_bandi + num_tokens print(token_count_bandi) history[-1][1] += html_escape(str(character)) time.sleep(0.05) yield history, "" else: if(token_count_bandi_sistema_puglia >= 1000): print("RESET!!!") token_count_bandi_sistema_puglia = 0 memory_bandi_sistema_puglia.reset() chat_engine_bandi_sistema_puglia.reset() print(chat_engine_bandi_sistema_puglia.chat_history) print(memory_bandi_sistema_puglia) stream_response = None print(user_message) stream_response = chat_engine_bandi_sistema_puglia.stream_chat(user_message) print("risposta con chat engine") responseHTML = "" for i, node in enumerate(stream_response.source_nodes): responseHTML += "
"+node.metadata['nome_bando']+"
Nodo 🔍
"+node.text+"
" history[-1][1] = "" for character in stream_response.response_gen: tokens = character.split(" ") num_tokens = len(tokens) token_count_bandi_sistema_puglia = token_count_bandi_sistema_puglia + num_tokens print(token_count_bandi_sistema_puglia) history[-1][1] += html_escape(str(character)) time.sleep(0.05) yield history,responseHTML else: if(str(current_response_mode)=="tree_summarize"): # define response synthesizer response_synthesizer = get_response_synthesizer(streaming=True,response_mode="tree_summarize",text_qa_template=text_qa_template) query_engine = None query_engine = RetrieverQueryEngine(retriever=retriever, response_synthesizer=response_synthesizer) stream_response = None print(user_message) stream_response = query_engine.query(user_message) print("risposta con query engine") responseHTML = "" for i, node in enumerate(stream_response.source_nodes): responseHTML += ""+node.metadata['nome_bando']+"
Nodo 🔍
"+node.text+"
" history[-1][1] = "" for character in stream_response.response_gen: history[-1][1] += html_escape(str(character)) time.sleep(0.05) yield history, responseHTML else: # define response synthesizer response_synthesizer = get_response_synthesizer(streaming=True,response_mode="compact",text_qa_template=text_qa_template, refine_template=refine_template) query_engine = None query_engine = RetrieverQueryEngine(retriever=retriever, response_synthesizer=response_synthesizer) stream_response = None print(user_message) stream_response = query_engine.query(user_message) print("risposta con query engine") responseHTML = "" for i, node in enumerate(stream_response.source_nodes): responseHTML += ""+node.metadata['nome_bando']+"
Nodo 🔍
"+node.text+"
" history[-1][1] = "" for character in stream_response.response_gen: history[-1][1] += html_escape(str(character)) time.sleep(0.05) yield history, responseHTML torch.cuda.empty_cache() torch.cuda.reset_max_memory_allocated() torch.cuda.reset_max_memory_cached() msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, chatbot, [chatbot, divDocumenti] ) demo.queue() demo.launch(debug=True, share=True) if __name__ == "__main__": main()