jeevan commited on
Commit
eb58fc5
1 Parent(s): e33920b

refactoring for azure and langsmith

Browse files
Files changed (2) hide show
  1. app.py +33 -9
  2. docker-compose.yml +38 -0
app.py CHANGED
@@ -21,9 +21,11 @@ GPT_MODEL = "gpt-4o-mini"
21
  # Used for Langsmith
22
  unique_id = uuid4().hex[0:8]
23
  os.environ["LANGCHAIN_TRACING_V2"] = "true"
24
- os.environ["LANGCHAIN_PROJECT"] = f"LangSmith LCEL RAG - {unique_id}"
 
25
 
26
  is_azure = False if os.environ.get("AZURE_DEPLOYMENT") is None else True
 
27
 
28
  # Utility functions
29
  def save_file(file: AskFileResponse,file_ext:str,is_azure:bool) -> str:
@@ -33,7 +35,7 @@ def save_file(file: AskFileResponse,file_ext:str,is_azure:bool) -> str:
33
  file_ext = ".txt"
34
  else:
35
  raise ValueError(f"Unknown file type: {file_ext}")
36
- dir = "/tmp" if is_azure else None
37
  with tempfile.NamedTemporaryFile(
38
  mode="wb", delete=False, suffix=file_ext,dir=dir
39
  ) as temp_file:
@@ -42,6 +44,28 @@ def save_file(file: AskFileResponse,file_ext:str,is_azure:bool) -> str:
42
  return temp_file_path
43
 
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # Prepare the components that will form the chain
46
 
47
  ## Step 1: Create a prompt template
@@ -83,20 +107,20 @@ async def on_chat_start():
83
  ).send()
84
 
85
  ## Load file and split into chunks
86
- msg = cl.Message(content=f"Processing `{files[0].name}`...")
87
- await msg.send()
88
 
89
  current_file_path = save_file(files[0], files[0].type,is_azure)
90
  loader_splitter = TextLoaderAndSplitterWrapper(ChunkingStrategy.RECURSIVE_CHARACTER_CHAR_SPLITTER, current_file_path)
91
  documents = loader_splitter.load_documents()
92
 
 
 
93
  ## Vectorising the documents
94
- qdrant_vectorstore = Qdrant.from_documents(
95
- documents=documents,
96
- embedding=embedding_model,
97
- location=":memory:"
98
- )
99
  qdrant_retriever = qdrant_vectorstore.as_retriever()
 
100
 
101
  # create the chain on new chart session
102
  retrieval_augmented_qa_chain = (
 
21
  # Used for Langsmith
22
  unique_id = uuid4().hex[0:8]
23
  os.environ["LANGCHAIN_TRACING_V2"] = "true"
24
+ if os.environ.get("LANGCHAIN_PROJECT") is None:
25
+ os.environ["LANGCHAIN_PROJECT"] = f"LangSmith LCEL RAG - {unique_id}"
26
 
27
  is_azure = False if os.environ.get("AZURE_DEPLOYMENT") is None else True
28
+ is_azure_qdrant_inmem = True if os.environ.get("AZURE_QDRANT_INMEM") else False
29
 
30
  # Utility functions
31
  def save_file(file: AskFileResponse,file_ext:str,is_azure:bool) -> str:
 
35
  file_ext = ".txt"
36
  else:
37
  raise ValueError(f"Unknown file type: {file_ext}")
38
+ dir = "/tmp" if is_azure_qdrant_inmem else None
39
  with tempfile.NamedTemporaryFile(
40
  mode="wb", delete=False, suffix=file_ext,dir=dir
41
  ) as temp_file:
 
44
  return temp_file_path
45
 
46
 
47
+ def setup_vectorstore(documents: List[str], embedding_model: OpenAIEmbeddings,is_azure:bool) -> Qdrant:
48
+ if is_azure:
49
+ if is_azure_qdrant_inmem:
50
+ qdrant_vectorstore = Qdrant.from_documents(
51
+ documents=documents,
52
+ embedding=embedding_model,
53
+ location=":memory:"
54
+ )
55
+ else:
56
+ qdrant_vectorstore = Qdrant.from_documents(
57
+ documents=documents,
58
+ embedding=embedding_model,
59
+ url="http://qdrant:6333", # Docker compose setup
60
+ )
61
+ else:
62
+ qdrant_vectorstore = Qdrant.from_documents(
63
+ documents=documents,
64
+ embedding=embedding_model,
65
+ location=":memory:"
66
+ )
67
+ return qdrant_vectorstore
68
+
69
  # Prepare the components that will form the chain
70
 
71
  ## Step 1: Create a prompt template
 
107
  ).send()
108
 
109
  ## Load file and split into chunks
110
+ await cl.Message(content=f"Processing `{files[0].name}`...").send()
 
111
 
112
  current_file_path = save_file(files[0], files[0].type,is_azure)
113
  loader_splitter = TextLoaderAndSplitterWrapper(ChunkingStrategy.RECURSIVE_CHARACTER_CHAR_SPLITTER, current_file_path)
114
  documents = loader_splitter.load_documents()
115
 
116
+ await cl.Message(content=" Data Chunked...").send()
117
+
118
  ## Vectorising the documents
119
+
120
+ qdrant_vectorstore = setup_vectorstore(documents, embedding_model,is_azure)
121
+
 
 
122
  qdrant_retriever = qdrant_vectorstore.as_retriever()
123
+ await cl.Message(content=" Created Vector store").send()
124
 
125
  # create the chain on new chart session
126
  retrieval_augmented_qa_chain = (
docker-compose.yml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ # Your Python Application Service
5
+ app:
6
+ build:
7
+ context: .
8
+ dockerfile: Dockerfile # Assuming your Dockerfile is named Dockerfile
9
+ container_name: app
10
+ user: "user" # Matching the user created in your Dockerfile
11
+ ports:
12
+ - "7860:7860" # Expose your application's port
13
+ environment:
14
+ - HOME=/home/user
15
+ - PATH=/home/user/.local/bin:$PATH
16
+ - AZURE_DEPLOYMENT=true
17
+ - AZURE_QDRANT_INMEM=true # False means use Qdrant service from the network
18
+ depends_on:
19
+ - qdrant # Ensure Qdrant starts before this service
20
+ volumes:
21
+ - .:/home/user/app # Mount current directory to container
22
+
23
+ # Qdrant Service
24
+ qdrant:
25
+ image: qdrant/qdrant:latest
26
+ restart: always
27
+ container_name: qdrant
28
+ ports:
29
+ - "6333:6333"
30
+ - "6334:6334"
31
+ expose:
32
+ - "6333"
33
+ - "6334"
34
+ - "6335"
35
+ # volumes:
36
+ # - ./qdrant_data:/qdrant/storage # Persist Qdrant data
37
+ # - ./qdrant_config/production.yaml:/qdrant/config/production.yaml # Mount configuration file
38
+