Spaces:

kenken999
/

fastapi_django_main_live

Running on Zero

App Files Files Community

kenken999 commited on Jun 21

Commit

73eedaf

•

1 Parent(s): e91d22b

test

Browse files

Files changed (2) hide show

babyagi/babyagi.py +75 -40
babyagi/extensions/weaviate_storage.py +17 -1

babyagi/babyagi.py CHANGED Viewed

@@ -20,7 +20,16 @@ from groq import Groq
 # default opt out of chromadb telemetry.
 from chromadb.config import Settings
 client = chromadb.Client(Settings(anonymized_telemetry=False))
 # Engine configuration
@@ -43,7 +52,8 @@ COOPERATIVE_MODE = "none"
 JOIN_EXISTING_OBJECTIVE = False
 # Goal configuration
-OBJECTIVE = os.getenv("OBJECTIVE", "")
 INITIAL_TASK = os.getenv("INITIAL_TASK", os.getenv("FIRST_TASK", ""))
 # Model configuration
@@ -182,7 +192,11 @@ class LlamaEmbeddingFunction(EmbeddingFunction):
     def __call__(self, texts: Documents) -> Embeddings:
         embeddings = []
         for t in texts:
-            e = llm_embed.embed(t)
             embeddings.append(e)
         return embeddings
@@ -200,24 +214,46 @@ class DefaultResultsStorage:
         )
         metric = "cosine"
-        if LLM_MODEL.startswith("llama"):
-            embedding_function = LlamaEmbeddingFunction()
-        else:
-            embedding_function = OpenAIEmbeddingFunction(api_key=OPENAI_API_KEY)
         self.collection = chroma_client.get_or_create_collection(
             name=RESULTS_STORE_NAME,
             metadata={"hnsw:space": metric},
             embedding_function=embedding_function,
         )
     def add(self, task: Dict, result: str, result_id: str):
         # Break the function if LLM_MODEL starts with "human" (case-insensitive)
         if LLM_MODEL.startswith("human"):
             return
         # Continue with the rest of the function
-        embeddings = llm_embed.embed(result) if LLM_MODEL.startswith("llama") else None
         if (
                 len(self.collection.get(ids=[result_id], include=[])["ids"]) > 0
         ):  # Check if the result already exists
@@ -337,34 +373,35 @@ def openai_call(
     temperature: float = OPENAI_TEMPERATURE,
     max_tokens: int = 100,
 ):
-    messages=[
-        {
-            "role": "user",
-            "content": "prompt"
-        }
-    ],
-    client = Groq(api_key=os.getenv("api_key"))
-    res = ""
-    completion = client.chat.completions.create(
-                    model="llama3-8b-8192",
-                    messages=[
-                        {
-                            "role": "user",
-                            "content": prompt
-                        }
-                    ],
-                    temperature=1,
-                    max_tokens=1024,
-                    top_p=1,
-                    stream=True,
-                    stop=None,
-                )
-    for chunk in completion:
-        print(chunk.choices[0].delta.content)
-        print(chunk.choices[0].delta.content or "", end="")
-        res += chunk.choices[0].delta.content or ""
-    return res
     while True:
@@ -474,7 +511,7 @@ The number of each entry must be followed by a period. If your list is empty, wr
 Unless your list is empty, do not include any headers before your numbered list or follow your numbered list with any other output."""
     print(f'\n*****TASK CREATION AGENT PROMPT****\n{prompt}\n')
-    response = openai_call(prompt, max_tokens=2000)
     print(f'\n****TASK CREATION AGENT RESPONSE****\n{response}\n')
     new_tasks = response.split('\n')
     new_tasks_list = []
@@ -584,23 +621,21 @@ def main():
     while loop:
         # As long as there are tasks in the storage...
         if not tasks_storage.is_empty():
             # Print the task list
             print("\033[95m\033[1m" + "\n*****TASK LIST*****\n" + "\033[0m\033[0m")
             for t in tasks_storage.get_task_names():
                 print(" • " + str(t))
             # Step 1: Pull the first incomplete task
             task = tasks_storage.popleft()
             print("\033[92m\033[1m" + "\n*****NEXT TASK*****\n" + "\033[0m\033[0m")
             print(str(task["task_name"]))
             # Send to execution function to complete the task based on the context
             result = execution_agent(OBJECTIVE, str(task["task_name"]))
             print("\033[93m\033[1m" + "\n*****TASK RESULT*****\n" + "\033[0m\033[0m")
             print(result)
-            return
             # Step 2: Enrich result and store in the results storage
             # This is where you should enrich the result if needed

 # default opt out of chromadb telemetry.
 from chromadb.config import Settings
+from transformers import AutoTokenizer, AutoModel
+import torch
+import numpy
+# モデル名を指定
+model_name = "sentence-transformers/all-MiniLM-L6-v2"
+# トークナイザーとモデルをロード
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModel.from_pretrained(model_name)
 client = chromadb.Client(Settings(anonymized_telemetry=False))
 # Engine configuration
 JOIN_EXISTING_OBJECTIVE = False
 # Goal configuration
+#OBJECTIVE = os.getenv("OBJECTIVE", "")
+OBJECTIVE = "ボットの性能をよくする方法　日本語で説明"
 INITIAL_TASK = os.getenv("INITIAL_TASK", os.getenv("FIRST_TASK", ""))
 # Model configuration
     def __call__(self, texts: Documents) -> Embeddings:
         embeddings = []
         for t in texts:
+            #e = llm_embed.embed(t)
+            inputs = tokenizer(t, return_tensors="pt")
+            outputs = model(**inputs)
+            # [CLS]トークンの出力を取得
+            e = outputs.last_hidden_state[:,0,:].squeeze().detach().cpu().numpy().tolist()
             embeddings.append(e)
         return embeddings
         )
         metric = "cosine"
+        #if LLM_MODEL.startswith("llama"):
+        embedding_function = LlamaEmbeddingFunction()
+        #else:
+        #    embedding_function = OpenAIEmbeddingFunction(api_key=OPENAI_API_KEY)
         self.collection = chroma_client.get_or_create_collection(
             name=RESULTS_STORE_NAME,
             metadata={"hnsw:space": metric},
             embedding_function=embedding_function,
         )
     def add(self, task: Dict, result: str, result_id: str):
         # Break the function if LLM_MODEL starts with "human" (case-insensitive)
         if LLM_MODEL.startswith("human"):
             return
+        return
+        #from langchain_community.chat_models import ChatOpenAI
         # Continue with the rest of the function
+        #llm_embed = ChatOpenAI(model_name="lama3-70b-8192",
+        #                            openai_api_key="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",
+        #                            openai_api_base="https://api.groq.com/openai/v1",
+        #                            )
+        #import openai
+        #openai.api_key = "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"
+        #openai.api_base = "https://api.groq.com/openai/v1"
+        #response = openai.embeddings.create(input=result,
+        #                                    model="lama3-70b-8192",
+        #
+        inputs = tokenizer(result, return_tensors="pt")
+        outputs = model(**inputs)
+        # [CLS]トークンの出力を取得
+        embeddings = outputs.last_hidden_state[:,0,:].squeeze().detach().cpu().numpy().tolist()
+        #cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze()
+        # テンソルが CPU 上にあることを確認し、NumPy 配列に変換
+        #cls_embedding_np = cls_embedding.detach().cpu().numpy()
+        #embeddings = response['data'][0]['embedding']
+        #embeddings = llm_embed.embed(result) if LLM_MODEL.startswith("llama") else None
         if (
                 len(self.collection.get(ids=[result_id], include=[])["ids"]) > 0
         ):  # Check if the result already exists
     temperature: float = OPENAI_TEMPERATURE,
     max_tokens: int = 100,
 ):
+    while True:
+        messages=[
+            {
+                "role": "user",
+                "content": "prompt"
+            }
+        ],
+        client = Groq(api_key=os.getenv("api_key"))
+        res = ""
+        print(prompt)
+        completion = client.chat.completions.create(
+                        model="llama3-8b-8192",
+                        messages=[
+                            {
+                                "role": "user",
+                                "content": prompt
+                            }
+                        ],
+                        temperature=1,
+                        max_tokens=1024,
+                        top_p=1,
+                        stream=True,
+                        stop=None,
+                    )
+        for chunk in completion:
+            #print(chunk.choices[0].delta.content)
+            #print(chunk.choices[0].delta.content or "", end="")
+            res += chunk.choices[0].delta.content or ""
+        return res
     while True:
 Unless your list is empty, do not include any headers before your numbered list or follow your numbered list with any other output."""
     print(f'\n*****TASK CREATION AGENT PROMPT****\n{prompt}\n')
+    response = openai_call(prompt, max_tokens=4000)
     print(f'\n****TASK CREATION AGENT RESPONSE****\n{response}\n')
     new_tasks = response.split('\n')
     new_tasks_list = []
     while loop:
         # As long as there are tasks in the storage...
         if not tasks_storage.is_empty():
+        #OBJECTIVE = "ボットの性能をよくする方法　日本語で説明"
             # Print the task list
             print("\033[95m\033[1m" + "\n*****TASK LIST*****\n" + "\033[0m\033[0m")
             for t in tasks_storage.get_task_names():
                 print(" • " + str(t))
             # Step 1: Pull the first incomplete task
             task = tasks_storage.popleft()
             print("\033[92m\033[1m" + "\n*****NEXT TASK*****\n" + "\033[0m\033[0m")
             print(str(task["task_name"]))
             # Send to execution function to complete the task based on the context
             result = execution_agent(OBJECTIVE, str(task["task_name"]))
             print("\033[93m\033[1m" + "\n*****TASK RESULT*****\n" + "\033[0m\033[0m")
             print(result)
             # Step 2: Enrich result and store in the results storage
             # This is where you should enrich the result if needed

babyagi/extensions/weaviate_storage.py CHANGED Viewed

@@ -6,7 +6,19 @@ from typing import Dict, List
 import openai
 import weaviate
 from weaviate.embedded import EmbeddedOptions
 def can_import(module_name):
     try:
@@ -121,7 +133,11 @@ class WeaviateResultsStorage:
     # Get embedding for the text
     def get_embedding(self, text: str) -> list:
         text = text.replace("\n", " ")
         if self.llm_model.startswith("llama"):
             from llama_cpp import Llama

 import openai
 import weaviate
 from weaviate.embedded import EmbeddedOptions
+# default opt out of chromadb telemetry.
+from chromadb.config import Settings
+from transformers import AutoTokenizer, AutoModel
+import torch
+import numpy
+# モデル名を指定
+model_name = "sentence-transformers/all-MiniLM-L6-v2"
+# トークナイザーとモデルをロード
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModel.from_pretrained(model_name)
+client = chromadb.Client(Settings(anonymized_telemetry=False))
 def can_import(module_name):
     try:
     # Get embedding for the text
     def get_embedding(self, text: str) -> list:
         text = text.replace("\n", " ")
+        inputs = tokenizer(text, return_tensors="pt")
+        outputs = model(**inputs)
+        # [CLS]トークンの出力を取得
+        embeddings = outputs.last_hidden_state[:,0,:].squeeze().detach().cpu().numpy().tolist()
+        return embeddings
         if self.llm_model.startswith("llama"):
             from llama_cpp import Llama