Update app.py
Browse files
app.py
CHANGED
@@ -110,58 +110,35 @@ if userinput and api_key and st.button("Extract Claims", key="claims_extraction"
|
|
110 |
|
111 |
# Display generated objectives for all chunks
|
112 |
learning_status_placeholder.text(f"Patentable Claims Extracted!\n{all_extracted_claims.strip()}")
|
113 |
-
with st.expander("See transcript"):
|
114 |
-
st.markdown(transcript)
|
115 |
|
116 |
-
|
117 |
-
|
118 |
-
"Select the model you want to use:",
|
119 |
-
["gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-3.5-turbo", "gpt-4-0314", "gpt-4-0613", "gpt-4"]
|
120 |
-
)
|
121 |
|
122 |
-
#
|
123 |
-
|
124 |
-
# userinput = st.text_input("Input Text:", "Freeform text here!") # Commented out, as it's updated above
|
125 |
|
126 |
-
# Initialize
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
129 |
|
130 |
-
#
|
131 |
-
st.
|
132 |
-
# Initialize autogenerated objectives
|
133 |
-
claims_extraction = ""
|
134 |
-
# Initialize status placeholder
|
135 |
-
learning_status_placeholder = st.empty()
|
136 |
-
disable_button_bool = False
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
input_chunks = chunk_text(userinput)
|
141 |
|
142 |
-
|
143 |
-
|
144 |
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
claims_extraction_response = openai.ChatCompletion.create(
|
151 |
-
model=model_choice,
|
152 |
-
messages=[
|
153 |
-
{"role": "user", "content": f"Extract any patentable claims from the following: \n {chunk}. \n Extract each claim. Briefly explain why you extracted this word phrase. Exclude any additional commentary."}
|
154 |
-
]
|
155 |
-
)
|
156 |
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
# Append the extracted claims from the current chunk to the overall results
|
161 |
-
all_extracted_claims += claims_extraction.strip()
|
162 |
-
|
163 |
-
# Save the generated objectives to session state
|
164 |
-
st.session_state.claims_extraction = all_extracted_claims
|
165 |
-
|
166 |
-
# Display generated objectives for all chunks
|
167 |
-
learning_status_placeholder.text(f"Patentable Claims Extracted!\n{all_extracted_claims.strip()}")
|
|
|
110 |
|
111 |
# Display generated objectives for all chunks
|
112 |
learning_status_placeholder.text(f"Patentable Claims Extracted!\n{all_extracted_claims.strip()}")
|
|
|
|
|
113 |
|
114 |
+
from transformers import AutoConfig, AutoTokenizer, AutoModel
|
115 |
+
from summarizer import Summarizer
|
|
|
|
|
|
|
116 |
|
117 |
+
# Define the BERT-based model name
|
118 |
+
model_name = 'nlpaueb/legal-bert-base-uncased'
|
|
|
119 |
|
120 |
+
# Initialize BERT-based model and tokenizer
|
121 |
+
custom_config = AutoConfig.from_pretrained(model_name)
|
122 |
+
custom_config.output_hidden_states = True
|
123 |
+
custom_tokenizer = AutoTokenizer.from_pretrained(model_name)
|
124 |
+
custom_model = AutoModel.from_pretrained(model_name, config=custom_config)
|
125 |
+
bert_legal_model = Summarizer(custom_model=custom_model, custom_tokenizer=custom_tokenizer)
|
126 |
+
print('Using model {}\n'.format(model_name))
|
127 |
|
128 |
+
# Get the extracted claims from Streamlit's session state
|
129 |
+
claims_extracted = st.session_state.claims_extraction
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
+
# Define the chunk size
|
132 |
+
chunk_size = 350
|
|
|
133 |
|
134 |
+
# Split the extracted claims into chunks
|
135 |
+
chunks = [claims_extracted[i:i+chunk_size] for i in range(0, len(claims_extracted), chunk_size)]
|
136 |
|
137 |
+
# Process each chunk with the BERT-based model
|
138 |
+
summaries = []
|
139 |
+
for chunk in chunks:
|
140 |
+
summary = bert_legal_model(chunk, min_length=8, ratio=0.05)
|
141 |
+
summaries.append(summary)
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
+
# Now you have a list of summaries for each chunk
|
144 |
+
# You can access them using `summaries[0]`, `summaries[1]`, etc.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|