Spaces:

Bikas0
/

Contract-Conditions-Extraction-and-Verification

Sleeping

Contract-Conditions-Extraction-and-Verification

File size: 9,018 Bytes

import streamlit as st
import os
import json
import pandas as pd
from docx import Document
from dotenv import load_dotenv
from openai import AzureOpenAI
from concurrent.futures import ThreadPoolExecutor, as_completed

# Load environment variables
load_dotenv()

# Azure OpenAI credentials
key = os.getenv("AZURE_OPENAI_API_KEY")
endpoint_url = "https://interview-key.openai.azure.com/"
api_version = "2024-05-01-preview"
deployment_id = "interview"

# Initialize Azure OpenAI client
client = AzureOpenAI(
    api_version=api_version,
    azure_endpoint=endpoint_url,
    api_key=key
)

# Streamlit app layout
st.set_page_config(layout="wide")

# Add custom CSS for center alignment
st.markdown("""
    <style>
    .centered-title {
        text-align: center;
        font-size: 2.5em;
        margin-top: 0;
    }
    </style>
    """, unsafe_allow_html=True)

def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    return "\n".join([para.text for para in doc.paragraphs])

def extract_terms_from_contract(contract_text):
    prompt = (
        "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
        "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
        "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
        "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
        "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
        "contains multiple terms, list them all.\n\n"
        "Contract text:\n"
        f"{contract_text}\n\n"
        "Provide the extracted terms in JSON format."
    )

    retries = 2
    wait_time = 1
    for i in range(retries):
        try:
            response = client.chat.completions.create(
            model=deployment_id,
            messages=[
                {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
                {"role": "user", "content": prompt},
            ],
            max_tokens=4096,
            n=1,
            stop=None,
            temperature=0.1,
        )
        return response.choices[0].message.content
        except Exception as e:
            st.error(f"Error extracting terms from contract: {e}")
            return None
        # except openai.error.RateLimitError:
        #     if i < retries - 1:
        #         st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
        #         time.sleep(wait_time)
        #         wait_time *= 2  # Exponential backoff
        #     else:
        #         st.error("Rate limit exceeded. Please try again later.")
        #         return None

def analyze_task_compliance(task_description, cost_estimate, contract_terms):
    prompt = (
        "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
        "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
        "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
        "If there are violations, list the reasons for each violation.\n\n"
        f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n"
        f"Task description:\n{task_description}\n"
        f"Cost estimate:\n{cost_estimate}\n\n"
        "Provide the compliance analysis in a clear JSON format."
    )

    retries = 5
    wait_time = 1
    for i in range(retries):
        try:
            response = client.chat.completions.create(
            model=deployment_id,
            messages=[
                {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
                {"role": "user", "content": prompt},
            ],
            max_tokens=4096,
            n=1,
            stop=None,
            temperature=0.1,
            stream=True,
        )

            compliance_analysis = ""
            for chunk in response:
                chunk_text = chunk['choices'][0]['delta'].get('content', '')
                compliance_analysis += chunk_text
                st.write(chunk_text)
                st.json(chunk_text)

            return json.loads(compliance_analysis)
        
        except Exception as e:
            st.error(f"Error analyzing task compliance: {e}")
            return None
            # response = openai.ChatCompletion.create(
            #     model="gpt-4",
            #     messages=[
            #         {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
            #         {"role": "user", "content": prompt},
            #     ],
            #     max_tokens=4096,
            #     n=1,
            #     stop=None,
            #     temperature=0.1,
            #     stream=True,
            # )

            # compliance_analysis = ""
            # for chunk in response:
            #     chunk_text = chunk['choices'][0]['delta'].get('content', '')
            #     compliance_analysis += chunk_text
            #     st.write(chunk_text)
            #     st.json(chunk_text)

            # return json.loads(compliance_analysis)
        # except openai.error.RateLimitError:
        #     if i < retries - 1:
        #         st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
        #         time.sleep(wait_time)
        #         wait_time *= 2  # Exponential backoff
        #     else:
        #         st.error("Rate limit exceeded. Please try again later.")
        #         return None

def main():
    st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)

    # File upload buttons one after another
    st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
    st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
    submit_button = st.sidebar.button("Submit")

    docx_file = st.session_state.get("docx_file")
    data_file = st.session_state.get("data_file")

    if submit_button and docx_file and data_file:
        # Clear previous information
        st.session_state.clear()

        # Extract contract text and terms
        contract_text = extract_text_from_docx(docx_file)
        extracted_terms_json = extract_terms_from_contract(contract_text)
        
        if extracted_terms_json is None:
            return
        
        try:
            contract_terms = json.loads(extracted_terms_json)
        except json.JSONDecodeError as e:
            st.error(f"JSON decoding error: {e}")
            return

        # Read task descriptions and cost estimates from XLSX or CSV
        if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
            tasks_df = pd.read_excel(data_file)
        else:
            tasks_df = pd.read_csv(data_file)

        compliance_results = []
        futures = []

        # Use ThreadPoolExecutor to analyze tasks concurrently
        with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers as needed
            for _, row in tasks_df.iterrows():
                task_description = row['Task Description']
                cost_estimate = row['Amount']
                futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms))
            
            for future in as_completed(futures):
                try:
                    result = future.result()
                    if result is not None:
                        compliance_results.append(result)
                except Exception as e:
                    st.error(f"An error occurred: {e}")

        col1, col2 = st.columns(2)

        with col1:
            st.write("Extracted Contract Terms:")
            st.json(contract_terms)
            
            # Download button for contract terms
            st.download_button(
                label="Download Contract Terms",
                data=json.dumps(contract_terms, indent=4),
                file_name="contract_terms.json",
                mime="application/json"
            )

        with col2:
            st.write("Compliance Results:")
            st.json(compliance_results)

            # Download button for compliance results
            compliance_results_json = json.dumps(compliance_results, indent=4)
            st.download_button(
                label="Download Compliance Results",
                data=compliance_results_json,
                file_name="compliance_results.json",
                mime="application/json"
            )

if __name__ == "__main__":
    main()