File size: 9,018 Bytes
5fc6a9d
 
 
 
 
 
e9584dc
 
5fc6a9d
e9584dc
5fc6a9d
e9584dc
 
 
 
 
 
 
 
 
 
 
 
 
5fc6a9d
1705208
 
 
 
 
 
 
 
 
 
 
 
 
 
5fc6a9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9584dc
 
 
 
 
 
f4f51e9
 
 
 
 
 
 
 
 
e9584dc
 
 
 
 
 
 
 
 
 
 
 
 
 
5fc6a9d
 
 
 
 
e9584dc
5fc6a9d
 
 
 
 
e9584dc
 
 
 
 
 
f4f51e9
 
 
 
 
 
 
 
e9584dc
f4f51e9
1705208
e9584dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5fc6a9d
 
1705208
 
 
 
 
 
 
 
 
 
 
64efe54
 
 
5fc6a9d
 
 
e9584dc
64efe54
 
 
5fc6a9d
 
 
 
 
e9584dc
1705208
 
 
 
 
5fc6a9d
 
e9584dc
 
 
 
 
 
 
 
5fc6a9d
e9584dc
 
 
 
 
 
 
 
5fc6a9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import streamlit as st
import os
import json
import pandas as pd
from docx import Document
from dotenv import load_dotenv
from openai import AzureOpenAI
from concurrent.futures import ThreadPoolExecutor, as_completed

# Load environment variables
load_dotenv()

# Azure OpenAI credentials
key = os.getenv("AZURE_OPENAI_API_KEY")
endpoint_url = "https://interview-key.openai.azure.com/"
api_version = "2024-05-01-preview"
deployment_id = "interview"

# Initialize Azure OpenAI client
client = AzureOpenAI(
    api_version=api_version,
    azure_endpoint=endpoint_url,
    api_key=key
)

# Streamlit app layout
st.set_page_config(layout="wide")

# Add custom CSS for center alignment
st.markdown("""
    <style>
    .centered-title {
        text-align: center;
        font-size: 2.5em;
        margin-top: 0;
    }
    </style>
    """, unsafe_allow_html=True)

def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    return "\n".join([para.text for para in doc.paragraphs])

def extract_terms_from_contract(contract_text):
    prompt = (
        "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
        "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
        "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
        "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
        "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
        "contains multiple terms, list them all.\n\n"
        "Contract text:\n"
        f"{contract_text}\n\n"
        "Provide the extracted terms in JSON format."
    )

    retries = 2
    wait_time = 1
    for i in range(retries):
        try:
            response = client.chat.completions.create(
            model=deployment_id,
            messages=[
                {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
                {"role": "user", "content": prompt},
            ],
            max_tokens=4096,
            n=1,
            stop=None,
            temperature=0.1,
        )
        return response.choices[0].message.content
        except Exception as e:
            st.error(f"Error extracting terms from contract: {e}")
            return None
        # except openai.error.RateLimitError:
        #     if i < retries - 1:
        #         st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
        #         time.sleep(wait_time)
        #         wait_time *= 2  # Exponential backoff
        #     else:
        #         st.error("Rate limit exceeded. Please try again later.")
        #         return None

def analyze_task_compliance(task_description, cost_estimate, contract_terms):
    prompt = (
        "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
        "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
        "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
        "If there are violations, list the reasons for each violation.\n\n"
        f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n"
        f"Task description:\n{task_description}\n"
        f"Cost estimate:\n{cost_estimate}\n\n"
        "Provide the compliance analysis in a clear JSON format."
    )

    retries = 5
    wait_time = 1
    for i in range(retries):
        try:
            response = client.chat.completions.create(
            model=deployment_id,
            messages=[
                {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
                {"role": "user", "content": prompt},
            ],
            max_tokens=4096,
            n=1,
            stop=None,
            temperature=0.1,
            stream=True,
        )

            compliance_analysis = ""
            for chunk in response:
                chunk_text = chunk['choices'][0]['delta'].get('content', '')
                compliance_analysis += chunk_text
                st.write(chunk_text)
                st.json(chunk_text)

            return json.loads(compliance_analysis)
        
        except Exception as e:
            st.error(f"Error analyzing task compliance: {e}")
            return None
            # response = openai.ChatCompletion.create(
            #     model="gpt-4",
            #     messages=[
            #         {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
            #         {"role": "user", "content": prompt},
            #     ],
            #     max_tokens=4096,
            #     n=1,
            #     stop=None,
            #     temperature=0.1,
            #     stream=True,
            # )

            # compliance_analysis = ""
            # for chunk in response:
            #     chunk_text = chunk['choices'][0]['delta'].get('content', '')
            #     compliance_analysis += chunk_text
            #     st.write(chunk_text)
            #     st.json(chunk_text)

            # return json.loads(compliance_analysis)
        # except openai.error.RateLimitError:
        #     if i < retries - 1:
        #         st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
        #         time.sleep(wait_time)
        #         wait_time *= 2  # Exponential backoff
        #     else:
        #         st.error("Rate limit exceeded. Please try again later.")
        #         return None

def main():
    st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)

    # File upload buttons one after another
    st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
    st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
    submit_button = st.sidebar.button("Submit")

    docx_file = st.session_state.get("docx_file")
    data_file = st.session_state.get("data_file")

    if submit_button and docx_file and data_file:
        # Clear previous information
        st.session_state.clear()

        # Extract contract text and terms
        contract_text = extract_text_from_docx(docx_file)
        extracted_terms_json = extract_terms_from_contract(contract_text)
        
        if extracted_terms_json is None:
            return
        
        try:
            contract_terms = json.loads(extracted_terms_json)
        except json.JSONDecodeError as e:
            st.error(f"JSON decoding error: {e}")
            return

        # Read task descriptions and cost estimates from XLSX or CSV
        if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
            tasks_df = pd.read_excel(data_file)
        else:
            tasks_df = pd.read_csv(data_file)

        compliance_results = []
        futures = []

        # Use ThreadPoolExecutor to analyze tasks concurrently
        with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers as needed
            for _, row in tasks_df.iterrows():
                task_description = row['Task Description']
                cost_estimate = row['Amount']
                futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms))
            
            for future in as_completed(futures):
                try:
                    result = future.result()
                    if result is not None:
                        compliance_results.append(result)
                except Exception as e:
                    st.error(f"An error occurred: {e}")

        col1, col2 = st.columns(2)

        with col1:
            st.write("Extracted Contract Terms:")
            st.json(contract_terms)
            
            # Download button for contract terms
            st.download_button(
                label="Download Contract Terms",
                data=json.dumps(contract_terms, indent=4),
                file_name="contract_terms.json",
                mime="application/json"
            )

        with col2:
            st.write("Compliance Results:")
            st.json(compliance_results)

            # Download button for compliance results
            compliance_results_json = json.dumps(compliance_results, indent=4)
            st.download_button(
                label="Download Compliance Results",
                data=compliance_results_json,
                file_name="compliance_results.json",
                mime="application/json"
            )

if __name__ == "__main__":
    main()