Tonic commited on
Commit
85e4d89
1 Parent(s): 7d18e6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -47
app.py CHANGED
@@ -110,58 +110,35 @@ if userinput and api_key and st.button("Extract Claims", key="claims_extraction"
110
 
111
  # Display generated objectives for all chunks
112
  learning_status_placeholder.text(f"Patentable Claims Extracted!\n{all_extracted_claims.strip()}")
113
- with st.expander("See transcript"):
114
- st.markdown(transcript)
115
 
116
- # Model Selection Dropdown
117
- model_choice = st.selectbox(
118
- "Select the model you want to use:",
119
- ["gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-3.5-turbo", "gpt-4-0314", "gpt-4-0613", "gpt-4"]
120
- )
121
 
122
- # Context, Subject, and Level
123
- context = "You are a patent claims identifier and extractor. You will freeform text, identify any claims contained therein that may be patentable. You identify, extract, print such claims, briefly explain why each claim is patentable."
124
- # userinput = st.text_input("Input Text:", "Freeform text here!") # Commented out, as it's updated above
125
 
126
- # Initialize OpenAI API
127
- if api_key:
128
- openai.api_key = api_key
 
 
 
 
129
 
130
- # Learning Objectives
131
- st.write("### Patentable Claims:")
132
- # Initialize autogenerated objectives
133
- claims_extraction = ""
134
- # Initialize status placeholder
135
- learning_status_placeholder = st.empty()
136
- disable_button_bool = False
137
 
138
- if userinput and api_key and st.button("Extract Claims", key="claims_extraction", disabled=disable_button_bool):
139
- # Split the user input into chunks
140
- input_chunks = chunk_text(userinput)
141
 
142
- # Initialize a variable to store the extracted claims
143
- all_extracted_claims = ""
144
 
145
- for chunk in input_chunks:
146
- # Display status message for the current chunk
147
- learning_status_placeholder.text(f"Extracting Patentable Claims for chunk {input_chunks.index(chunk) + 1}...")
148
-
149
- # API call to generate objectives for the current chunk
150
- claims_extraction_response = openai.ChatCompletion.create(
151
- model=model_choice,
152
- messages=[
153
- {"role": "user", "content": f"Extract any patentable claims from the following: \n {chunk}. \n Extract each claim. Briefly explain why you extracted this word phrase. Exclude any additional commentary."}
154
- ]
155
- )
156
 
157
- # Extract the generated objectives from the API response
158
- claims_extraction = claims_extraction_response['choices'][0]['message']['content']
159
-
160
- # Append the extracted claims from the current chunk to the overall results
161
- all_extracted_claims += claims_extraction.strip()
162
-
163
- # Save the generated objectives to session state
164
- st.session_state.claims_extraction = all_extracted_claims
165
-
166
- # Display generated objectives for all chunks
167
- learning_status_placeholder.text(f"Patentable Claims Extracted!\n{all_extracted_claims.strip()}")
 
110
 
111
  # Display generated objectives for all chunks
112
  learning_status_placeholder.text(f"Patentable Claims Extracted!\n{all_extracted_claims.strip()}")
 
 
113
 
114
+ from transformers import AutoConfig, AutoTokenizer, AutoModel
115
+ from summarizer import Summarizer
 
 
 
116
 
117
+ # Define the BERT-based model name
118
+ model_name = 'nlpaueb/legal-bert-base-uncased'
 
119
 
120
+ # Initialize BERT-based model and tokenizer
121
+ custom_config = AutoConfig.from_pretrained(model_name)
122
+ custom_config.output_hidden_states = True
123
+ custom_tokenizer = AutoTokenizer.from_pretrained(model_name)
124
+ custom_model = AutoModel.from_pretrained(model_name, config=custom_config)
125
+ bert_legal_model = Summarizer(custom_model=custom_model, custom_tokenizer=custom_tokenizer)
126
+ print('Using model {}\n'.format(model_name))
127
 
128
+ # Get the extracted claims from Streamlit's session state
129
+ claims_extracted = st.session_state.claims_extraction
 
 
 
 
 
130
 
131
+ # Define the chunk size
132
+ chunk_size = 350
 
133
 
134
+ # Split the extracted claims into chunks
135
+ chunks = [claims_extracted[i:i+chunk_size] for i in range(0, len(claims_extracted), chunk_size)]
136
 
137
+ # Process each chunk with the BERT-based model
138
+ summaries = []
139
+ for chunk in chunks:
140
+ summary = bert_legal_model(chunk, min_length=8, ratio=0.05)
141
+ summaries.append(summary)
 
 
 
 
 
 
142
 
143
+ # Now you have a list of summaries for each chunk
144
+ # You can access them using `summaries[0]`, `summaries[1]`, etc.