Spaces:

Bikas0
/

Contract-Conditions-Extraction-and-Verification

Sleeping

App Files Files Community

Bikas0 commited on Jul 6

Commit

f4f51e9

•

1 Parent(s): 1705208

Update the code

Browse files

Files changed (2) hide show

app.py +425 -78
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,12 +1,385 @@
 import streamlit as st
 import os
 import openai
 import json
 import pandas as pd
 from docx import Document
-from concurrent.futures import ThreadPoolExecutor, as_completed
 from dotenv import load_dotenv
 import time
 # Load the OpenAI API key from environment variables
 load_dotenv()
@@ -44,78 +417,57 @@ def extract_terms_from_contract(contract_text):
         "Provide the extracted terms in JSON format."
     )
-    retries = 2
-    wait_time = 1
-    for i in range(retries):
-        try:
-            response = openai.ChatCompletion.create(
-                model="gpt-4",
-                messages=[
-                    {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
-                    {"role": "user", "content": prompt},
-                ],
-                max_tokens=4096,
-                n=1,
-                stop=None,
-                temperature=0.1,
-            )
-            return response.choices[0].message["content"]
-        except openai.error.RateLimitError:
-            if i < retries - 1:
-                st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
-                time.sleep(wait_time)
-                wait_time *= 2  # Exponential backoff
-            else:
-                st.error("Rate limit exceeded. Please try again later.")
-                return None
-def analyze_task_compliance(task_description, cost_estimate, contract_terms):
     prompt = (
         "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
         "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
         "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
         "If there are violations, list the reasons for each violation.\n\n"
-        f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n"
         f"Task description:\n{task_description}\n"
         f"Cost estimate:\n{cost_estimate}\n\n"
         "Provide the compliance analysis in a clear JSON format."
     )
-    retries = 5
-    wait_time = 1
-    for i in range(retries):
-        try:
-            response = openai.ChatCompletion.create(
-                model="gpt-4",
-                messages=[
-                    {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
-                    {"role": "user", "content": prompt},
-                ],
-                max_tokens=4096,
-                n=1,
-                stop=None,
-                temperature=0.1,
-                stream=True,
-            )
-            compliance_analysis = ""
-            for chunk in response:
-                chunk_text = chunk['choices'][0]['delta'].get('content', '')
-                compliance_analysis += chunk_text
-                st.write(chunk_text)
-                st.json(chunk_text)
-            return json.loads(compliance_analysis)
-        except openai.error.RateLimitError:
-            if i < retries - 1:
-                st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
-                time.sleep(wait_time)
-                wait_time *= 2  # Exponential backoff
-            else:
-                st.error("Rate limit exceeded. Please try again later.")
-                return None
 def main():
     st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)
     # File upload buttons one after another
@@ -133,7 +485,7 @@ def main():
         # Extract contract text and terms
         contract_text = extract_text_from_docx(docx_file)
         extracted_terms_json = extract_terms_from_contract(contract_text)
         if extracted_terms_json is None:
             return
@@ -142,7 +494,7 @@ def main():
         except json.JSONDecodeError as e:
             st.error(f"JSON decoding error: {e}")
             return
         # Read task descriptions and cost estimates from XLSX or CSV
         if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
             tasks_df = pd.read_excel(data_file)
@@ -150,23 +502,16 @@ def main():
             tasks_df = pd.read_csv(data_file)
         compliance_results = []
-        futures = []
-        # Use ThreadPoolExecutor to analyze tasks concurrently
-        with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers as needed
-            for _, row in tasks_df.iterrows():
-                task_description = row['Task Description']
-                cost_estimate = row['Amount']
-                futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms))
-            for future in as_completed(futures):
-                try:
-                    result = future.result()
-                    if result is not None:
-                        compliance_results.append(result)
-                except Exception as e:
-                    st.error(f"An error occurred: {e}")
         col1, col2 = st.columns(2)
         with col1:
@@ -193,6 +538,8 @@ def main():
                 file_name="compliance_results.json",
                 mime="application/json"
             )
 if __name__ == "__main__":
     main()

+# import streamlit as st
+# import os
+# import openai
+# import json
+# import pandas as pd
+# from docx import Document
+# from concurrent.futures import ThreadPoolExecutor, as_completed
+# from dotenv import load_dotenv
+# import time
+# # Load the OpenAI API key from environment variables
+# load_dotenv()
+# api_key = os.getenv("OPENAI_API_KEY")
+# openai.api_key = api_key
+# # Streamlit app layout
+# st.set_page_config(layout="wide")
+# # Add custom CSS for center alignment
+# st.markdown("""
+#     <style>
+#     .centered-title {
+#         text-align: center;
+#         font-size: 2.5em;
+#         margin-top: 0;
+#     }
+#     </style>
+#     """, unsafe_allow_html=True)
+# def extract_text_from_docx(docx_path):
+#     doc = Document(docx_path)
+#     return "\n".join([para.text for para in doc.paragraphs])
+# def extract_terms_from_contract(contract_text):
+#     prompt = (
+#         "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
+#         "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
+#         "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
+#         "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
+#         "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
+#         "contains multiple terms, list them all.\n\n"
+#         "Contract text:\n"
+#         f"{contract_text}\n\n"
+#         "Provide the extracted terms in JSON format."
+#     )
+#     retries = 2
+#     wait_time = 1
+#     for i in range(retries):
+#         try:
+#             response = openai.ChatCompletion.create(
+#                 model="gpt-4",
+#                 messages=[
+#                     {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
+#                     {"role": "user", "content": prompt},
+#                 ],
+#                 max_tokens=4096,
+#                 n=1,
+#                 stop=None,
+#                 temperature=0.1,
+#             )
+#             return response.choices[0].message["content"]
+#         except openai.error.RateLimitError:
+#             if i < retries - 1:
+#                 st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
+#                 time.sleep(wait_time)
+#                 wait_time *= 2  # Exponential backoff
+#             else:
+#                 st.error("Rate limit exceeded. Please try again later.")
+#                 return None
+# def analyze_task_compliance(task_description, cost_estimate, contract_terms):
+#     prompt = (
+#         "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
+#         "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
+#         "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
+#         "If there are violations, list the reasons for each violation.\n\n"
+#         f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n"
+#         f"Task description:\n{task_description}\n"
+#         f"Cost estimate:\n{cost_estimate}\n\n"
+#         "Provide the compliance analysis in a clear JSON format."
+#     )
+#     retries = 5
+#     wait_time = 1
+#     for i in range(retries):
+#         try:
+#             response = openai.ChatCompletion.create(
+#                 model="gpt-4",
+#                 messages=[
+#                     {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
+#                     {"role": "user", "content": prompt},
+#                 ],
+#                 max_tokens=4096,
+#                 n=1,
+#                 stop=None,
+#                 temperature=0.1,
+#                 stream=True,
+#             )
+#             compliance_analysis = ""
+#             for chunk in response:
+#                 chunk_text = chunk['choices'][0]['delta'].get('content', '')
+#                 compliance_analysis += chunk_text
+#                 st.write(chunk_text)
+#                 st.json(chunk_text)
+#             return json.loads(compliance_analysis)
+#         except openai.error.RateLimitError:
+#             if i < retries - 1:
+#                 st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
+#                 time.sleep(wait_time)
+#                 wait_time *= 2  # Exponential backoff
+#             else:
+#                 st.error("Rate limit exceeded. Please try again later.")
+#                 return None
+# def main():
+#     st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)
+#     # File upload buttons one after another
+#     st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
+#     st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
+#     submit_button = st.sidebar.button("Submit")
+#     docx_file = st.session_state.get("docx_file")
+#     data_file = st.session_state.get("data_file")
+#     if submit_button and docx_file and data_file:
+#         # Clear previous information
+#         st.session_state.clear()
+#         # Extract contract text and terms
+#         contract_text = extract_text_from_docx(docx_file)
+#         extracted_terms_json = extract_terms_from_contract(contract_text)
+#         if extracted_terms_json is None:
+#             return
+#         try:
+#             contract_terms = json.loads(extracted_terms_json)
+#         except json.JSONDecodeError as e:
+#             st.error(f"JSON decoding error: {e}")
+#             return
+#         # Read task descriptions and cost estimates from XLSX or CSV
+#         if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
+#             tasks_df = pd.read_excel(data_file)
+#         else:
+#             tasks_df = pd.read_csv(data_file)
+#         compliance_results = []
+#         futures = []
+#         # Use ThreadPoolExecutor to analyze tasks concurrently
+#         with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers as needed
+#             for _, row in tasks_df.iterrows():
+#                 task_description = row['Task Description']
+#                 cost_estimate = row['Amount']
+#                 futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms))
+#             for future in as_completed(futures):
+#                 try:
+#                     result = future.result()
+#                     if result is not None:
+#                         compliance_results.append(result)
+#                 except Exception as e:
+#                     st.error(f"An error occurred: {e}")
+#         col1, col2 = st.columns(2)
+#         with col1:
+#             st.write("Extracted Contract Terms:")
+#             st.json(contract_terms)
+#             # Download button for contract terms
+#             st.download_button(
+#                 label="Download Contract Terms",
+#                 data=json.dumps(contract_terms, indent=4),
+#                 file_name="contract_terms.json",
+#                 mime="application/json"
+#             )
+#         with col2:
+#             st.write("Compliance Results:")
+#             st.json(compliance_results)
+#             # Download button for compliance results
+#             compliance_results_json = json.dumps(compliance_results, indent=4)
+#             st.download_button(
+#                 label="Download Compliance Results",
+#                 data=compliance_results_json,
+#                 file_name="compliance_results.json",
+#                 mime="application/json"
+#             )
+# if __name__ == "__main__":
+#     main()
+# import streamlit as st
+# import os
+# import openai
+# import json
+# import pandas as pd
+# from docx import Document
+# from dotenv import load_dotenv
+# import time
+# # Load the OpenAI API key from environment variables
+# load_dotenv()
+# api_key = os.getenv("OPENAI_API_KEY")
+# openai.api_key = api_key
+# # Streamlit app layout
+# st.set_page_config(layout="wide")
+# # Add custom CSS for center alignment
+# st.markdown("""
+#     <style>
+#     .centered-title {
+#         text-align: center;
+#         font-size: 2.5em;
+#         margin-top: 0;
+#     }
+#     </style>
+#     """, unsafe_allow_html=True)
+# def extract_text_from_docx(docx_path):
+#     doc = Document(docx_path)
+#     return "\n".join([para.text for para in doc.paragraphs])
+# def extract_terms_from_contract(contract_text):
+#     prompt = (
+#         "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
+#         "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
+#         "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
+#         "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
+#         "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
+#         "contains multiple terms, list them all.\n\n"
+#         "Contract text:\n"
+#         f"{contract_text}\n\n"
+#         "Provide the extracted terms in JSON format."
+#     )
+#     try:
+#         response = openai.ChatCompletion.create(
+#             model="gpt-4",
+#             messages=[
+#                 {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
+#                 {"role": "user", "content": prompt},
+#             ],
+#             max_tokens=4096,
+#             n=1,
+#             stop=None,
+#             temperature=0.1,
+#         )
+#         return response.choices[0].message["content"]
+#     except openai.error.OpenAIError as e:
+#         st.error(f"Error extracting terms from contract: {e}")
+#         return None
+# def analyze_task_compliance(task_description, cost_estimate, contract_text):
+#     prompt = (
+#         "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
+#         "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
+#         "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
+#         "If there are violations, list the reasons for each violation.\n\n"
+#         f"Contract terms:\n{contract_text}\n\n"
+#         f"Task description:\n{task_description}\n"
+#         f"Cost estimate:\n{cost_estimate}\n\n"
+#         "Provide the compliance analysis in a clear JSON format."
+#     )
+#     try:
+#         response = openai.ChatCompletion.create(
+#             model="gpt-4",
+#             messages=[
+#                 {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
+#                 {"role": "user", "content": prompt},
+#             ],
+#             max_tokens=4096,
+#             n=1,
+#             stop=None,
+#             temperature=0.1,
+#         )
+#         return json.loads(response.choices[0].message["content"])
+#     except openai.error.OpenAIError as e:
+#         st.error(f"Error analyzing task compliance: {e}")
+#         return None
+# def main():
+#     start = time.time()
+#     st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)
+#     # File upload buttons one after another
+#     st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
+#     st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
+#     submit_button = st.sidebar.button("Submit")
+#     docx_file = st.session_state.get("docx_file")
+#     data_file = st.session_state.get("data_file")
+#     if submit_button and docx_file and data_file:
+#         # Clear previous information
+#         st.session_state.clear()
+#         # Extract contract text and terms
+#         contract_text = extract_text_from_docx(docx_file)
+#         extracted_terms_json = extract_terms_from_contract(contract_text)
+#         if extracted_terms_json is None:
+#             return
+#         try:
+#             contract_terms = json.loads(extracted_terms_json)
+#         except json.JSONDecodeError as e:
+#             st.error(f"JSON decoding error: {e}")
+#             return
+#         # Introducing a 1-second delay before analyzing task compliance
+#         time.sleep(8)
+#         # Read task descriptions and cost estimates from XLSX or CSV
+#         if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
+#             tasks_df = pd.read_excel(data_file)
+#         else:
+#             tasks_df = pd.read_csv(data_file)
+#         compliance_results = []
+#         # Process tasks sequentially
+#         for _, row in tasks_df.iterrows():
+#             task_description = row['Task Description']
+#             cost_estimate = row['Amount']
+#             result = analyze_task_compliance(task_description, cost_estimate, contract_text)
+#             if result is not None:
+#                 compliance_results.append(result)
+#         col1, col2 = st.columns(2)
+#         with col1:
+#             st.write("Extracted Contract Terms:")
+#             st.json(contract_terms)
+#             # Download button for contract terms
+#             st.download_button(
+#                 label="Download Contract Terms",
+#                 data=json.dumps(contract_terms, indent=4),
+#                 file_name="contract_terms.json",
+#                 mime="application/json"
+#             )
+#         with col2:
+#             st.write("Compliance Results:")
+#             st.json(compliance_results)
+#             # Download button for compliance results
+#             compliance_results_json = json.dumps(compliance_results, indent=4)
+#             st.download_button(
+#                 label="Download Compliance Results",
+#                 data=compliance_results_json,
+#                 file_name="compliance_results.json",
+#                 mime="application/json"
+#             )
+#     end = time.time()
+#     print("Total Time: ", end-start)
+# if __name__ == "__main__":
+#     main()
 import streamlit as st
 import os
 import openai
 import json
 import pandas as pd
 from docx import Document
 from dotenv import load_dotenv
 import time
+import retrying
 # Load the OpenAI API key from environment variables
 load_dotenv()
         "Provide the extracted terms in JSON format."
     )
+    try:
+        response = openai.ChatCompletion.create(
+            model="gpt-4",
+            messages=[
+                {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
+                {"role": "user", "content": prompt},
+            ],
+            max_tokens=4096,
+            n=1,
+            stop=None,
+            temperature=0.1,
+        )
+        return response.choices[0].message["content"]
+    except openai.error.OpenAIError as e:
+        st.error(f"Error extracting terms from contract: {e}")
+        return None
+# Add a retry decorator with exponential backoff
+@retrying.retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
+def analyze_task_compliance(task_description, cost_estimate, contract_text):
     prompt = (
         "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
         "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
         "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
         "If there are violations, list the reasons for each violation.\n\n"
+        f"Contract terms:\n{contract_text}\n\n"
         f"Task description:\n{task_description}\n"
         f"Cost estimate:\n{cost_estimate}\n\n"
         "Provide the compliance analysis in a clear JSON format."
     )
+    try:
+        response = openai.ChatCompletion.create(
+            model="gpt-4",
+            messages=[
+                {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
+                {"role": "user", "content": prompt},
+            ],
+            max_tokens=4096,
+            n=1,
+            stop=None,
+            temperature=0.1,
+        )
+        return json.loads(response.choices[0].message["content"])
+    except openai.error.OpenAIError as e:
+        st.error(f"Error analyzing task compliance: {e}")
+        return None
 def main():
+    start = time.time()
     st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)
     # File upload buttons one after another
         # Extract contract text and terms
         contract_text = extract_text_from_docx(docx_file)
         extracted_terms_json = extract_terms_from_contract(contract_text)
         if extracted_terms_json is None:
             return
         except json.JSONDecodeError as e:
             st.error(f"JSON decoding error: {e}")
             return
         # Read task descriptions and cost estimates from XLSX or CSV
         if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
             tasks_df = pd.read_excel(data_file)
             tasks_df = pd.read_csv(data_file)
         compliance_results = []
+        # Process tasks sequentially
+        for _, row in tasks_df.iterrows():
+            task_description = row['Task Description']
+            cost_estimate = row['Amount']
+            result = analyze_task_compliance(task_description, cost_estimate, contract_text)
+            if result is not None:
+                compliance_results.append(result)
         col1, col2 = st.columns(2)
         with col1:
                 file_name="compliance_results.json",
                 mime="application/json"
             )
+    end = time.time()
+    print("Total Time: ", end-start)
 if __name__ == "__main__":
     main()

requirements.txt CHANGED Viewed

@@ -5,3 +5,4 @@ python-docx
 pandas
 streamlit
 openpyxl

 pandas
 streamlit
 openpyxl
+retrying