TejAndrewsACC commited on
Commit
d2cce01
1 Parent(s): 2971ebe

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+
3
+ # Load model and tokenizer
4
+ model_name = "Qwen/QwQ-32B-Preview"
5
+ model = AutoModelForCausalLM.from_pretrained(
6
+ model_name,
7
+ torch_dtype="auto",
8
+ device_map="auto"
9
+ )
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+
12
+ # Initialize persistent conversation with a system message
13
+ system_message = {"role": "system", "content": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."}
14
+ messages = [system_message]
15
+
16
+ # Chat loop to maintain persistence
17
+ while True:
18
+ user_input = input("User: ") # Get user input
19
+ if user_input.lower() in {"exit", "quit"}:
20
+ print("Chat session ended.")
21
+ break
22
+
23
+ # Append user message to the conversation history
24
+ messages.append({"role": "user", "content": user_input})
25
+
26
+ # Format the messages for the model
27
+ text = tokenizer.apply_chat_template(
28
+ messages,
29
+ tokenize=False,
30
+ add_generation_prompt=True
31
+ )
32
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
33
+
34
+ # Generate response
35
+ generated_ids = model.generate(
36
+ **model_inputs,
37
+ max_new_tokens=512
38
+ )
39
+ generated_ids = [
40
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
41
+ ]
42
+
43
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
44
+
45
+ # Append assistant's response to the conversation history
46
+ messages.append({"role": "assistant", "content": response})
47
+
48
+ # Display the assistant's response
49
+ print(f"Assistant: {response}")