Set max_new_tokens to 2000
#1
by
GeorgiosIoannouCoder
- opened
app.py
CHANGED
@@ -202,17 +202,17 @@ class RAGQuestionAnswering:
|
|
202 |
@st.cache_resource
|
203 |
def load_pipe():
|
204 |
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
205 |
-
# True
|
206 |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
207 |
-
|
208 |
-
|
209 |
-
print("torch.cuda.
|
210 |
-
|
211 |
-
|
212 |
|
213 |
-
pipe = pipeline("text-generation", model="Qwen/Qwen2.5-1.5B-Instruct", device='cuda:0', max_length=1000)
|
214 |
return(pipe)
|
|
|
215 |
self.pipe = load_pipe()
|
|
|
216 |
return None
|
217 |
|
218 |
|
@@ -266,14 +266,16 @@ class RAGQuestionAnswering:
|
|
266 |
"""
|
267 |
|
268 |
formatted_prompt = self.prompt.format(**input_dict)
|
|
|
269 |
messages=[
|
270 |
{"role": "system", "content": formatted_prompt},
|
271 |
{"role": "user", "content": input_dict["question"]},
|
272 |
]
|
273 |
-
|
274 |
response = self.pipe(messages)
|
275 |
print("#"*88)
|
276 |
print(response, type(response))
|
|
|
277 |
return str(response)
|
278 |
|
279 |
def setup_rag_chain(self) -> None:
|
|
|
202 |
@st.cache_resource
|
203 |
def load_pipe():
|
204 |
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
|
|
205 |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
206 |
+
print(f"torch.cuda.current_device(): {torch.cuda.current_device()}")
|
207 |
+
print(f"torch.cuda.device(0): {torch.cuda.device(0)}")
|
208 |
+
print(f"torch.cuda.get_device_name(0): {torch.cuda.get_device_name(0)}")
|
209 |
+
|
210 |
+
pipe = pipeline("text-generation", model="Qwen/Qwen2.5-1.5B-Instruct", device='cuda:0', max_new_tokens=2000)
|
211 |
|
|
|
212 |
return(pipe)
|
213 |
+
|
214 |
self.pipe = load_pipe()
|
215 |
+
|
216 |
return None
|
217 |
|
218 |
|
|
|
266 |
"""
|
267 |
|
268 |
formatted_prompt = self.prompt.format(**input_dict)
|
269 |
+
|
270 |
messages=[
|
271 |
{"role": "system", "content": formatted_prompt},
|
272 |
{"role": "user", "content": input_dict["question"]},
|
273 |
]
|
274 |
+
|
275 |
response = self.pipe(messages)
|
276 |
print("#"*88)
|
277 |
print(response, type(response))
|
278 |
+
|
279 |
return str(response)
|
280 |
|
281 |
def setup_rag_chain(self) -> None:
|