dh-mc commited on
Commit
4cae0a4
1 Parent(s): 5a380be
app_modules/llm_inference.py CHANGED
@@ -69,41 +69,40 @@ class LLMInference(metaclass=abc.ABCMeta):
69
  return result
70
 
71
  def _execute_chain(self, chain, inputs, q, sh):
72
- self.llm_loader.lock.acquire()
73
- try:
74
- q.put(chain(inputs, callbacks=[sh]))
75
- finally:
76
- # Release the lock
77
- self.llm_loader.lock.release()
78
 
79
  def _run_chain(self, chain, inputs, streaming_handler):
80
- que = Queue()
81
-
82
- t = Thread(
83
- target=self._execute_chain,
84
- args=(chain, inputs, que, streaming_handler),
85
- )
86
- t.start()
87
 
88
- if self.llm_loader.streamer is not None and isinstance(
89
- self.llm_loader.streamer, TextIteratorStreamer
90
- ):
91
- count = (
92
- 2
93
- if "chat_history" in inputs and len(inputs.get("chat_history")) > 0
94
- else 1
95
  )
96
-
97
- while count > 0:
98
- try:
99
- for token in self.llm_loader.streamer:
100
- streaming_handler.on_llm_new_token(token)
101
-
102
- self.llm_loader.streamer.reset()
103
- count -= 1
104
- except Exception:
105
- print("nothing generated yet - retry in 0.5s")
106
- time.sleep(0.5)
107
-
108
- t.join()
109
- return que.get()
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  return result
70
 
71
  def _execute_chain(self, chain, inputs, q, sh):
72
+ q.put(chain(inputs, callbacks=[sh]))
 
 
 
 
 
73
 
74
  def _run_chain(self, chain, inputs, streaming_handler):
75
+ self.llm_loader.lock.acquire()
76
+ try:
77
+ que = Queue()
 
 
 
 
78
 
79
+ t = Thread(
80
+ target=self._execute_chain,
81
+ args=(chain, inputs, que, streaming_handler),
 
 
 
 
82
  )
83
+ t.start()
84
+
85
+ if self.llm_loader.streamer is not None and isinstance(
86
+ self.llm_loader.streamer, TextIteratorStreamer
87
+ ):
88
+ count = (
89
+ 2
90
+ if "chat_history" in inputs and len(inputs.get("chat_history")) > 0
91
+ else 1
92
+ )
93
+
94
+ while count > 0:
95
+ try:
96
+ for token in self.llm_loader.streamer:
97
+ streaming_handler.on_llm_new_token(token)
98
+
99
+ self.llm_loader.streamer.reset()
100
+ count -= 1
101
+ except Exception:
102
+ print("nothing generated yet - retry in 0.5s")
103
+ time.sleep(0.5)
104
+
105
+ t.join()
106
+ return que.get()
107
+ finally:
108
+ self.llm_loader.lock.release()
notebooks/YT_LLaMA2_7B_Chat_LangChain_Basics.ipynb DELETED
The diff for this file is too large to render. See raw diff