File size: 9,018 Bytes
5fc6a9d e9584dc 5fc6a9d e9584dc 5fc6a9d e9584dc 5fc6a9d 1705208 5fc6a9d e9584dc f4f51e9 e9584dc 5fc6a9d e9584dc 5fc6a9d e9584dc f4f51e9 e9584dc f4f51e9 1705208 e9584dc 5fc6a9d 1705208 64efe54 5fc6a9d e9584dc 64efe54 5fc6a9d e9584dc 1705208 5fc6a9d e9584dc 5fc6a9d e9584dc 5fc6a9d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 |
import streamlit as st
import os
import json
import pandas as pd
from docx import Document
from dotenv import load_dotenv
from openai import AzureOpenAI
from concurrent.futures import ThreadPoolExecutor, as_completed
# Load environment variables
load_dotenv()
# Azure OpenAI credentials
key = os.getenv("AZURE_OPENAI_API_KEY")
endpoint_url = "https://interview-key.openai.azure.com/"
api_version = "2024-05-01-preview"
deployment_id = "interview"
# Initialize Azure OpenAI client
client = AzureOpenAI(
api_version=api_version,
azure_endpoint=endpoint_url,
api_key=key
)
# Streamlit app layout
st.set_page_config(layout="wide")
# Add custom CSS for center alignment
st.markdown("""
<style>
.centered-title {
text-align: center;
font-size: 2.5em;
margin-top: 0;
}
</style>
""", unsafe_allow_html=True)
def extract_text_from_docx(docx_path):
doc = Document(docx_path)
return "\n".join([para.text for para in doc.paragraphs])
def extract_terms_from_contract(contract_text):
prompt = (
"You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
"various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
"penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
"structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
"Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
"contains multiple terms, list them all.\n\n"
"Contract text:\n"
f"{contract_text}\n\n"
"Provide the extracted terms in JSON format."
)
retries = 2
wait_time = 1
for i in range(retries):
try:
response = client.chat.completions.create(
model=deployment_id,
messages=[
{"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
{"role": "user", "content": prompt},
],
max_tokens=4096,
n=1,
stop=None,
temperature=0.1,
)
return response.choices[0].message.content
except Exception as e:
st.error(f"Error extracting terms from contract: {e}")
return None
# except openai.error.RateLimitError:
# if i < retries - 1:
# st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
# time.sleep(wait_time)
# wait_time *= 2 # Exponential backoff
# else:
# st.error("Rate limit exceeded. Please try again later.")
# return None
def analyze_task_compliance(task_description, cost_estimate, contract_terms):
prompt = (
"You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
"Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
"Your job is to analyze the task description and specify if it violates any conditions from the contract. "
"If there are violations, list the reasons for each violation.\n\n"
f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n"
f"Task description:\n{task_description}\n"
f"Cost estimate:\n{cost_estimate}\n\n"
"Provide the compliance analysis in a clear JSON format."
)
retries = 5
wait_time = 1
for i in range(retries):
try:
response = client.chat.completions.create(
model=deployment_id,
messages=[
{"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
{"role": "user", "content": prompt},
],
max_tokens=4096,
n=1,
stop=None,
temperature=0.1,
stream=True,
)
compliance_analysis = ""
for chunk in response:
chunk_text = chunk['choices'][0]['delta'].get('content', '')
compliance_analysis += chunk_text
st.write(chunk_text)
st.json(chunk_text)
return json.loads(compliance_analysis)
except Exception as e:
st.error(f"Error analyzing task compliance: {e}")
return None
# response = openai.ChatCompletion.create(
# model="gpt-4",
# messages=[
# {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
# {"role": "user", "content": prompt},
# ],
# max_tokens=4096,
# n=1,
# stop=None,
# temperature=0.1,
# stream=True,
# )
# compliance_analysis = ""
# for chunk in response:
# chunk_text = chunk['choices'][0]['delta'].get('content', '')
# compliance_analysis += chunk_text
# st.write(chunk_text)
# st.json(chunk_text)
# return json.loads(compliance_analysis)
# except openai.error.RateLimitError:
# if i < retries - 1:
# st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
# time.sleep(wait_time)
# wait_time *= 2 # Exponential backoff
# else:
# st.error("Rate limit exceeded. Please try again later.")
# return None
def main():
st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)
# File upload buttons one after another
st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
submit_button = st.sidebar.button("Submit")
docx_file = st.session_state.get("docx_file")
data_file = st.session_state.get("data_file")
if submit_button and docx_file and data_file:
# Clear previous information
st.session_state.clear()
# Extract contract text and terms
contract_text = extract_text_from_docx(docx_file)
extracted_terms_json = extract_terms_from_contract(contract_text)
if extracted_terms_json is None:
return
try:
contract_terms = json.loads(extracted_terms_json)
except json.JSONDecodeError as e:
st.error(f"JSON decoding error: {e}")
return
# Read task descriptions and cost estimates from XLSX or CSV
if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
tasks_df = pd.read_excel(data_file)
else:
tasks_df = pd.read_csv(data_file)
compliance_results = []
futures = []
# Use ThreadPoolExecutor to analyze tasks concurrently
with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed
for _, row in tasks_df.iterrows():
task_description = row['Task Description']
cost_estimate = row['Amount']
futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms))
for future in as_completed(futures):
try:
result = future.result()
if result is not None:
compliance_results.append(result)
except Exception as e:
st.error(f"An error occurred: {e}")
col1, col2 = st.columns(2)
with col1:
st.write("Extracted Contract Terms:")
st.json(contract_terms)
# Download button for contract terms
st.download_button(
label="Download Contract Terms",
data=json.dumps(contract_terms, indent=4),
file_name="contract_terms.json",
mime="application/json"
)
with col2:
st.write("Compliance Results:")
st.json(compliance_results)
# Download button for compliance results
compliance_results_json = json.dumps(compliance_results, indent=4)
st.download_button(
label="Download Compliance Results",
data=compliance_results_json,
file_name="compliance_results.json",
mime="application/json"
)
if __name__ == "__main__":
main()
|