kdevoe commited on
Commit
fa7af89
1 Parent(s): 90d8219

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -31
app.py CHANGED
@@ -6,41 +6,60 @@ from langchain.memory import ConversationBufferMemory
6
  # Move model to device (GPU if available)
7
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
8
 
9
- # Load the tokenizer (you can use the pre-trained tokenizer for GPT-2 family)
10
  tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
11
 
12
- # Manually create a configuration for the model (since we don't have config.json)
13
- config = GPT2Config.from_pretrained("distilgpt2")
14
 
15
- # Initialize the model using the manually created configuration
16
- model = GPT2LMHeadModel(config)
 
 
17
 
18
- # Load the weights from the pytorch_model.bin file
19
- model_path = "./pytorch_model_100.bin" # Path to local model file
20
- state_dict = torch.load(model_path, map_location=device) # Load the state_dict
21
- model.load_state_dict(state_dict) # Load the state dict into the model
22
-
23
- # Move model to the device (GPU or CPU)
24
- model.to(device)
25
 
26
  # Set up conversational memory using LangChain's ConversationBufferMemory
27
  memory = ConversationBufferMemory()
28
 
29
- # Define the chatbot function with memory and additional parameters
30
- def chat_with_distilgpt2(input_text, temperature, top_p, top_k):
31
  # Retrieve conversation history
32
  conversation_history = memory.load_memory_variables({})['history']
33
 
34
- # Combine the (possibly summarized) history with the current user input
35
  no_memory_input = f"Question: {input_text}\nAnswer:"
36
-
37
  # Tokenize the input and convert to tensor
38
  input_ids = tokenizer.encode(no_memory_input, return_tensors="pt").to(device)
39
 
40
- # Generate the response using the model with adjusted parameters
41
- outputs = model.generate(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  input_ids,
43
- max_length=input_ids.shape[1] + 50, # Limit total length
44
  max_new_tokens=15,
45
  num_return_sequences=1,
46
  no_repeat_ngram_size=3,
@@ -48,33 +67,38 @@ def chat_with_distilgpt2(input_text, temperature, top_p, top_k):
48
  early_stopping=True,
49
  pad_token_id=tokenizer.eos_token_id,
50
  eos_token_id=tokenizer.eos_token_id,
51
- temperature=temperature, # Add temperature from slider
52
- top_p=top_p, # Add top_p from slider
53
- top_k=top_k # Add top_k from slider
54
  )
55
 
56
- # Decode the model output
57
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
58
 
59
- # Update the memory with the user input and model response
60
- memory.save_context({"input": input_text}, {"output": response})
61
 
62
- return response
 
63
 
64
  # Set up the Gradio interface with additional sliders
65
  interface = gr.Interface(
66
- fn=chat_with_distilgpt2,
67
  inputs=[
68
  gr.Textbox(label="Chat with DistilGPT-2"), # User input text
69
  gr.Slider(0.1, 1.0, step=0.1, value=1.0, label="Temperature"), # Slider for temperature
70
  gr.Slider(0.0, 1.0, step=0.1, value=1.0, label="Top-p"), # Slider for top-p
71
  gr.Slider(1, 100, step=1, value=50, label="Top-k") # Slider for top-k
72
  ],
73
- outputs=gr.Textbox(label="DistilGPT-2's Response"), # Model response
74
- title="DistilGPT-2 Chatbot with Memory and Adjustable Parameters",
75
- description="This is a simple chatbot powered by the DistilGPT-2 model with conversational memory, using LangChain. You can adjust temperature, top-p, and top-k using the sliders.",
 
 
 
76
  )
77
 
78
  # Launch the Gradio app
79
  interface.launch()
80
 
 
 
6
  # Move model to device (GPU if available)
7
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
8
 
9
+ # Load the tokenizer (same tokenizer for both models since both are GPT-2 based)
10
  tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
11
 
12
+ # Load the baseline model (pre-trained DistilGPT2)
13
+ baseline_model = GPT2LMHeadModel.from_pretrained("distilgpt2").to(device)
14
 
15
+ # Load the fine-tuned model using its configuration and state dictionary
16
+ # You should have a local fine-tuned model file for this (pytorch_model_100.bin)
17
+ fine_tuned_config = GPT2Config.from_pretrained("distilgpt2")
18
+ fine_tuned_model = GPT2LMHeadModel(fine_tuned_config)
19
 
20
+ # Load the fine-tuned weights
21
+ model_path = "./pytorch_model_100.bin" # Path to your fine-tuned model file
22
+ state_dict = torch.load(model_path, map_location=device)
23
+ fine_tuned_model.load_state_dict(state_dict)
24
+ fine_tuned_model.to(device)
 
 
25
 
26
  # Set up conversational memory using LangChain's ConversationBufferMemory
27
  memory = ConversationBufferMemory()
28
 
29
+ # Define the chatbot function with both baseline and fine-tuned models
30
+ def chat_with_both_models(input_text, temperature, top_p, top_k):
31
  # Retrieve conversation history
32
  conversation_history = memory.load_memory_variables({})['history']
33
 
34
+ # Combine the conversation history with the user input (or just use input directly)
35
  no_memory_input = f"Question: {input_text}\nAnswer:"
36
+
37
  # Tokenize the input and convert to tensor
38
  input_ids = tokenizer.encode(no_memory_input, return_tensors="pt").to(device)
39
 
40
+ # Generate response from baseline DistilGPT2
41
+ baseline_outputs = baseline_model.generate(
42
+ input_ids,
43
+ max_length=input_ids.shape[1] + 50,
44
+ max_new_tokens=15,
45
+ num_return_sequences=1,
46
+ no_repeat_ngram_size=3,
47
+ repetition_penalty=1.2,
48
+ early_stopping=True,
49
+ pad_token_id=tokenizer.eos_token_id,
50
+ eos_token_id=tokenizer.eos_token_id,
51
+ temperature=temperature,
52
+ top_p=top_p,
53
+ top_k=top_k
54
+ )
55
+
56
+ # Decode the baseline model output
57
+ baseline_response = tokenizer.decode(baseline_outputs[0], skip_special_tokens=True)
58
+
59
+ # Generate response from the fine-tuned DistilGPT2
60
+ fine_tuned_outputs = fine_tuned_model.generate(
61
  input_ids,
62
+ max_length=input_ids.shape[1] + 50,
63
  max_new_tokens=15,
64
  num_return_sequences=1,
65
  no_repeat_ngram_size=3,
 
67
  early_stopping=True,
68
  pad_token_id=tokenizer.eos_token_id,
69
  eos_token_id=tokenizer.eos_token_id,
70
+ temperature=temperature,
71
+ top_p=top_p,
72
+ top_k=top_k
73
  )
74
 
75
+ # Decode the fine-tuned model output
76
+ fine_tuned_response = tokenizer.decode(fine_tuned_outputs[0], skip_special_tokens=True)
77
 
78
+ # Update the memory with the user input and responses from both models
79
+ memory.save_context({"input": input_text}, {"baseline_output": baseline_response, "fine_tuned_output": fine_tuned_response})
80
 
81
+ # Return both responses
82
+ return baseline_response, fine_tuned_response
83
 
84
  # Set up the Gradio interface with additional sliders
85
  interface = gr.Interface(
86
+ fn=chat_with_both_models,
87
  inputs=[
88
  gr.Textbox(label="Chat with DistilGPT-2"), # User input text
89
  gr.Slider(0.1, 1.0, step=0.1, value=1.0, label="Temperature"), # Slider for temperature
90
  gr.Slider(0.0, 1.0, step=0.1, value=1.0, label="Top-p"), # Slider for top-p
91
  gr.Slider(1, 100, step=1, value=50, label="Top-k") # Slider for top-k
92
  ],
93
+ outputs=[
94
+ gr.Textbox(label="Baseline DistilGPT-2's Response"), # Baseline model response
95
+ gr.Textbox(label="Fine-tuned DistilGPT-2's Response") # Fine-tuned model response
96
+ ],
97
+ title="DistilGPT-2 Chatbot: Baseline vs Fine-tuned",
98
+ description="This app compares the responses of a baseline DistilGPT-2 and a fine-tuned version for each input prompt. You can adjust temperature, top-p, and top-k using the sliders.",
99
  )
100
 
101
  # Launch the Gradio app
102
  interface.launch()
103
 
104
+