Set max_new_tokens to 2000

#1
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -202,17 +202,17 @@ class RAGQuestionAnswering:
202
  @st.cache_resource
203
  def load_pipe():
204
  print(f"Is CUDA available: {torch.cuda.is_available()}")
205
- # True
206
  print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
207
-
208
- #######################
209
- print("torch.cuda.current_device()", torch.cuda.current_device())
210
- print("torch.cuda.device(0)", torch.cuda.device(0))
211
- print("torch.cuda.get_device_name(0)",torch.cuda.get_device_name(0))
212
 
213
- pipe = pipeline("text-generation", model="Qwen/Qwen2.5-1.5B-Instruct", device='cuda:0', max_length=1000)
214
  return(pipe)
 
215
  self.pipe = load_pipe()
 
216
  return None
217
 
218
 
@@ -266,14 +266,16 @@ class RAGQuestionAnswering:
266
  """
267
 
268
  formatted_prompt = self.prompt.format(**input_dict)
 
269
  messages=[
270
  {"role": "system", "content": formatted_prompt},
271
  {"role": "user", "content": input_dict["question"]},
272
  ]
273
- # Use a pipeline as a high-level helper
274
  response = self.pipe(messages)
275
  print("#"*88)
276
  print(response, type(response))
 
277
  return str(response)
278
 
279
  def setup_rag_chain(self) -> None:
 
202
  @st.cache_resource
203
  def load_pipe():
204
  print(f"Is CUDA available: {torch.cuda.is_available()}")
 
205
  print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
206
+ print(f"torch.cuda.current_device(): {torch.cuda.current_device()}")
207
+ print(f"torch.cuda.device(0): {torch.cuda.device(0)}")
208
+ print(f"torch.cuda.get_device_name(0): {torch.cuda.get_device_name(0)}")
209
+
210
+ pipe = pipeline("text-generation", model="Qwen/Qwen2.5-1.5B-Instruct", device='cuda:0', max_new_tokens=2000)
211
 
 
212
  return(pipe)
213
+
214
  self.pipe = load_pipe()
215
+
216
  return None
217
 
218
 
 
266
  """
267
 
268
  formatted_prompt = self.prompt.format(**input_dict)
269
+
270
  messages=[
271
  {"role": "system", "content": formatted_prompt},
272
  {"role": "user", "content": input_dict["question"]},
273
  ]
274
+
275
  response = self.pipe(messages)
276
  print("#"*88)
277
  print(response, type(response))
278
+
279
  return str(response)
280
 
281
  def setup_rag_chain(self) -> None: