kdevoe commited on
Commit
a47f900
1 Parent(s): fa7af89

Reverting back to single model hosted. Comparison with baseline taking too long.

Browse files
Files changed (1) hide show
  1. app.py +32 -54
app.py CHANGED
@@ -6,41 +6,41 @@ from langchain.memory import ConversationBufferMemory
6
  # Move model to device (GPU if available)
7
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
8
 
9
- # Load the tokenizer (same tokenizer for both models since both are GPT-2 based)
10
  tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
11
 
12
- # Load the baseline model (pre-trained DistilGPT2)
13
- baseline_model = GPT2LMHeadModel.from_pretrained("distilgpt2").to(device)
14
 
15
- # Load the fine-tuned model using its configuration and state dictionary
16
- # You should have a local fine-tuned model file for this (pytorch_model_100.bin)
17
- fine_tuned_config = GPT2Config.from_pretrained("distilgpt2")
18
- fine_tuned_model = GPT2LMHeadModel(fine_tuned_config)
19
 
20
- # Load the fine-tuned weights
21
- model_path = "./pytorch_model_100.bin" # Path to your fine-tuned model file
22
- state_dict = torch.load(model_path, map_location=device)
23
- fine_tuned_model.load_state_dict(state_dict)
24
- fine_tuned_model.to(device)
 
 
25
 
26
  # Set up conversational memory using LangChain's ConversationBufferMemory
27
  memory = ConversationBufferMemory()
28
 
29
- # Define the chatbot function with both baseline and fine-tuned models
30
- def chat_with_both_models(input_text, temperature, top_p, top_k):
31
  # Retrieve conversation history
32
  conversation_history = memory.load_memory_variables({})['history']
33
 
34
- # Combine the conversation history with the user input (or just use input directly)
35
  no_memory_input = f"Question: {input_text}\nAnswer:"
36
-
37
  # Tokenize the input and convert to tensor
38
  input_ids = tokenizer.encode(no_memory_input, return_tensors="pt").to(device)
39
 
40
- # Generate response from baseline DistilGPT2
41
- baseline_outputs = baseline_model.generate(
42
  input_ids,
43
- max_length=input_ids.shape[1] + 50,
44
  max_new_tokens=15,
45
  num_return_sequences=1,
46
  no_repeat_ngram_size=3,
@@ -48,57 +48,35 @@ def chat_with_both_models(input_text, temperature, top_p, top_k):
48
  early_stopping=True,
49
  pad_token_id=tokenizer.eos_token_id,
50
  eos_token_id=tokenizer.eos_token_id,
51
- temperature=temperature,
52
- top_p=top_p,
53
- top_k=top_k
54
  )
55
 
56
- # Decode the baseline model output
57
- baseline_response = tokenizer.decode(baseline_outputs[0], skip_special_tokens=True)
58
 
59
- # Generate response from the fine-tuned DistilGPT2
60
- fine_tuned_outputs = fine_tuned_model.generate(
61
- input_ids,
62
- max_length=input_ids.shape[1] + 50,
63
- max_new_tokens=15,
64
- num_return_sequences=1,
65
- no_repeat_ngram_size=3,
66
- repetition_penalty=1.2,
67
- early_stopping=True,
68
- pad_token_id=tokenizer.eos_token_id,
69
- eos_token_id=tokenizer.eos_token_id,
70
- temperature=temperature,
71
- top_p=top_p,
72
- top_k=top_k
73
- )
74
 
75
- # Decode the fine-tuned model output
76
- fine_tuned_response = tokenizer.decode(fine_tuned_outputs[0], skip_special_tokens=True)
77
-
78
- # Update the memory with the user input and responses from both models
79
- memory.save_context({"input": input_text}, {"baseline_output": baseline_response, "fine_tuned_output": fine_tuned_response})
80
-
81
- # Return both responses
82
- return baseline_response, fine_tuned_response
83
 
84
  # Set up the Gradio interface with additional sliders
85
  interface = gr.Interface(
86
- fn=chat_with_both_models,
87
  inputs=[
88
  gr.Textbox(label="Chat with DistilGPT-2"), # User input text
89
  gr.Slider(0.1, 1.0, step=0.1, value=1.0, label="Temperature"), # Slider for temperature
90
  gr.Slider(0.0, 1.0, step=0.1, value=1.0, label="Top-p"), # Slider for top-p
91
  gr.Slider(1, 100, step=1, value=50, label="Top-k") # Slider for top-k
92
  ],
93
- outputs=[
94
- gr.Textbox(label="Baseline DistilGPT-2's Response"), # Baseline model response
95
- gr.Textbox(label="Fine-tuned DistilGPT-2's Response") # Fine-tuned model response
96
- ],
97
- title="DistilGPT-2 Chatbot: Baseline vs Fine-tuned",
98
- description="This app compares the responses of a baseline DistilGPT-2 and a fine-tuned version for each input prompt. You can adjust temperature, top-p, and top-k using the sliders.",
99
  )
100
 
101
  # Launch the Gradio app
102
  interface.launch()
103
 
 
104
 
 
6
  # Move model to device (GPU if available)
7
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
8
 
9
+ # Load the tokenizer (you can use the pre-trained tokenizer for GPT-2 family)
10
  tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
11
 
12
+ # Manually create a configuration for the model (since we don't have config.json)
13
+ config = GPT2Config.from_pretrained("distilgpt2")
14
 
15
+ # Initialize the model using the manually created configuration
16
+ model = GPT2LMHeadModel(config)
 
 
17
 
18
+ # Load the weights from the pytorch_model.bin file
19
+ model_path = "./pytorch_model_100.bin" # Path to local model file
20
+ state_dict = torch.load(model_path, map_location=device) # Load the state_dict
21
+ model.load_state_dict(state_dict) # Load the state dict into the model
22
+
23
+ # Move model to the device (GPU or CPU)
24
+ model.to(device)
25
 
26
  # Set up conversational memory using LangChain's ConversationBufferMemory
27
  memory = ConversationBufferMemory()
28
 
29
+ # Define the chatbot function with memory and additional parameters
30
+ def chat_with_distilgpt2(input_text, temperature, top_p, top_k):
31
  # Retrieve conversation history
32
  conversation_history = memory.load_memory_variables({})['history']
33
 
34
+ # Combine the (possibly summarized) history with the current user input
35
  no_memory_input = f"Question: {input_text}\nAnswer:"
36
+
37
  # Tokenize the input and convert to tensor
38
  input_ids = tokenizer.encode(no_memory_input, return_tensors="pt").to(device)
39
 
40
+ # Generate the response using the model with adjusted parameters
41
+ outputs = model.generate(
42
  input_ids,
43
+ max_length=input_ids.shape[1] + 50, # Limit total length
44
  max_new_tokens=15,
45
  num_return_sequences=1,
46
  no_repeat_ngram_size=3,
 
48
  early_stopping=True,
49
  pad_token_id=tokenizer.eos_token_id,
50
  eos_token_id=tokenizer.eos_token_id,
51
+ temperature=temperature, # Add temperature from slider
52
+ top_p=top_p, # Add top_p from slider
53
+ top_k=top_k # Add top_k from slider
54
  )
55
 
56
+ # Decode the model output
57
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
58
 
59
+ # Update the memory with the user input and model response
60
+ memory.save_context({"input": input_text}, {"output": response})
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ return response
 
 
 
 
 
 
 
63
 
64
  # Set up the Gradio interface with additional sliders
65
  interface = gr.Interface(
66
+ fn=chat_with_distilgpt2,
67
  inputs=[
68
  gr.Textbox(label="Chat with DistilGPT-2"), # User input text
69
  gr.Slider(0.1, 1.0, step=0.1, value=1.0, label="Temperature"), # Slider for temperature
70
  gr.Slider(0.0, 1.0, step=0.1, value=1.0, label="Top-p"), # Slider for top-p
71
  gr.Slider(1, 100, step=1, value=50, label="Top-k") # Slider for top-k
72
  ],
73
+ outputs=gr.Textbox(label="DistilGPT-2's Response"), # Model response
74
+ title="DistilGPT-2 Chatbot with Memory and Adjustable Parameters",
75
+ description="This is a simple chatbot powered by the DistilGPT-2 model with conversational memory, using LangChain. You can adjust temperature, top-p, and top-k using the sliders.",
 
 
 
76
  )
77
 
78
  # Launch the Gradio app
79
  interface.launch()
80
 
81
+ How can this be modified to give the results for both a baseline DistilGPT2 and the fine tuned version for each input prompt?
82