PREMIUM-PAID-GPU-cuny-tech-prep-tutorial-5

Sleeping

Set max_new_tokens to 2000

by GeorgiosIoannouCoder - opened 15 days ago

←

Files changed (1) hide show

app.py CHANGED Viewed

@@ -202,17 +202,17 @@ class RAGQuestionAnswering:
         @st.cache_resource
         def load_pipe():
             print(f"Is CUDA available: {torch.cuda.is_available()}")
-            # True
             print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
-            #######################
-            print("torch.cuda.current_device()", torch.cuda.current_device())
-            print("torch.cuda.device(0)", torch.cuda.device(0))
-            print("torch.cuda.get_device_name(0)",torch.cuda.get_device_name(0))
-            pipe = pipeline("text-generation", model="Qwen/Qwen2.5-1.5B-Instruct", device='cuda:0', max_length=1000)
             return(pipe)
         self.pipe = load_pipe()
         return None
@@ -266,14 +266,16 @@ class RAGQuestionAnswering:
         """
         formatted_prompt = self.prompt.format(**input_dict)
         messages=[
             {"role": "system", "content": formatted_prompt},
             {"role": "user", "content": input_dict["question"]},
         ]
-        # Use a pipeline as a high-level helper
         response = self.pipe(messages)
         print("#"*88)
         print(response, type(response))
         return str(response)
     def setup_rag_chain(self) -> None:

         @st.cache_resource
         def load_pipe():
             print(f"Is CUDA available: {torch.cuda.is_available()}")
             print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
+            print(f"torch.cuda.current_device(): {torch.cuda.current_device()}")
+            print(f"torch.cuda.device(0): {torch.cuda.device(0)}")
+            print(f"torch.cuda.get_device_name(0): {torch.cuda.get_device_name(0)}")
+            pipe = pipeline("text-generation", model="Qwen/Qwen2.5-1.5B-Instruct", device='cuda:0', max_new_tokens=2000)
             return(pipe)
         self.pipe = load_pipe()
         return None
         """
         formatted_prompt = self.prompt.format(**input_dict)
         messages=[
             {"role": "system", "content": formatted_prompt},
             {"role": "user", "content": input_dict["question"]},
         ]
         response = self.pipe(messages)
         print("#"*88)
         print(response, type(response))
         return str(response)
     def setup_rag_chain(self) -> None: