crystal-technologies
/

CRYSTAL-R1

Model card Files Files and versions Community

crystal-technologies commited on Oct 26, 2023

Commit

38057e4

•

1 Parent(s): e34adac

Upload 1671 files

Browse files

Files changed (12) hide show

.gitattributes +6 -0
CircumSpect/object_detection/__pycache__/detect_objects.cpython-39.pyc +0 -0
CircumSpect/object_detection/detect_objects.py +1 -1
SoundScribe/transcribe.py +1 -3
api_host.py +63 -110
crystal.py +5 -4
database/current_frame.jpg +0 -0
database/input.txt +1 -1
database/notes.txt +0 -0
database/recording.wav +0 -0
requirements.txt +1 -1
utils.py +19 -7

.gitattributes ADDED Viewed

	@@ -0,0 +1,6 @@

+CircumSpect/object_detection/weights/groundingdino_swint_ogc.pth filter=lfs diff=lfs merge=lfs -text
+Perceptrix/finetune/scripts/eval/local_data/reading_comprehension/coqa.jsonl filter=lfs diff=lfs merge=lfs -text
+Perceptrix/finetune/scripts/eval/local_data/reading_comprehension/narrative_qa.jsonl filter=lfs diff=lfs merge=lfs -text
+Perceptrix/finetune/scripts/eval/local_data/symbolic_problem_solving/bigbench_elementary_math_qa.jsonl filter=lfs diff=lfs merge=lfs -text
+SoundScribe/SpeakerID/tools/speech_data_explorer/screenshot.png filter=lfs diff=lfs merge=lfs -text
+SoundScribe/voices/Vatsal.wav filter=lfs diff=lfs merge=lfs -text

CircumSpect/object_detection/__pycache__/detect_objects.cpython-39.pyc CHANGED Viewed

Binary files a/CircumSpect/object_detection/__pycache__/detect_objects.cpython-39.pyc and b/CircumSpect/object_detection/__pycache__/detect_objects.cpython-39.pyc differ

CircumSpect/object_detection/detect_objects.py CHANGED Viewed

@@ -83,5 +83,5 @@ def locate_object(objects, image):
 if __name__ == "__main__":
     frame, coord = locate_object(
-        "https://images.nationalgeographic.org/image/upload/v1638890052/EducationHub/photos/robots-3d-landing-page.jpg", "drawer")
     print(coord)

 if __name__ == "__main__":
     frame, coord = locate_object(
+        "drawer", "https://images.nationalgeographic.org/image/upload/v1638890052/EducationHub/photos/robots-3d-landing-page.jpg")
     print(coord)

SoundScribe/transcribe.py CHANGED Viewed

@@ -26,7 +26,6 @@ queued = False
 def transcribe(audio):
     result = model.transcribe(audio)
     transcription = result['text']
-    print(transcription)
     # user = find_user("database/recording.wav")
     user = "Vatsal"
     if user != "Crystal":
@@ -73,8 +72,7 @@ def listen(model, stream):
             if queued:
                 transcription()
                 queued = False
-            else:
-                print("No audio found")
             silence_duration = 0
             output_file.close()
             audio_data = None

 def transcribe(audio):
     result = model.transcribe(audio)
     transcription = result['text']
     # user = find_user("database/recording.wav")
     user = "Vatsal"
     if user != "Crystal":
             if queued:
                 transcription()
                 queued = False
             silence_duration = 0
             output_file.close()
             audio_data = None

api_host.py CHANGED Viewed

@@ -9,134 +9,87 @@ import os
 model = whisper.load_model("base")
 def transcribe(audio):
     result = model.transcribe(audio)
     transcription = result['text']
     print(transcription)
     return transcription
 app = Flask(__name__)
-@app.route('/locate_object', methods=['POST'])
-def display_image():
     try:
-        image_data = request.json['image_data']
-        prompt = request.json['prompt']
-        image_data = np.array(image_data, dtype=np.uint8)
-        image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
-        cv2.imwrite('API.jpg', image)
-        answer, annotated_image = locate_object(prompt, "API.jpg")
-        return jsonify({'message': answer, 'annotated_image': annotated_image})
     except Exception as e:
         return jsonify({'error': str(e)})
-@app.route('/vqa', methods=['POST'])
-def display_image():
     try:
-        image_data = request.json['image_data']
-        prompt = request.json['prompt']
-        image_data = np.array(image_data, dtype=np.uint8)
-        image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
-        cv2.imwrite('API.jpg', image)
-        answer = answer_question(prompt, "API.jpg")
-        return jsonify({'message': answer})
     except Exception as e:
-        return jsonify({'error': str(e)})
-@app.route('/object_description', methods=['POST'])
-def display_image():
-    try:
-        image_data = request.json['image_data']
-        image_data = np.array(image_data, dtype=np.uint8)
-        image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
-        cv2.imwrite('API.jpg', image)
-        answer = find_object_description("API.jpg")
-        return jsonify({'message': answer})
-    except Exception as e:
-        return jsonify({'error': str(e)})
-@app.route('/perceptrix', methods=['POST'])
-def display_image():
-    try:
-        prompt = request.json['prompt']
-        answer = perceptrix(prompt)
-        return jsonify({'message': answer})
-    except Exception as e:
-        return jsonify({'error': str(e)})
-@app.route('/robotix', methods=['POST'])
-def display_image():
-    try:
-        prompt = request.json['prompt']
-        answer = robotix(prompt)
-        return jsonify({'message': answer})
-    except Exception as e:
-        return jsonify({'error': str(e)})
-@app.route('/search_keyword', methods=['POST'])
-def display_image():
-    try:
-        prompt = request.json['prompt']
-        answer = search_keyword(prompt)
-        return jsonify({'message': answer})
-    except Exception as e:
-        return jsonify({'error': str(e)})
-@app.route('/identify_objects_from_text', methods=['POST'])
-def display_image():
-    try:
-        prompt = request.json['prompt']
-        answer = identify_objects_from_text(prompt)
-        return jsonify({'message': answer})
-    except Exception as e:
-        return jsonify({'error': str(e)})
-@app.route('/transcribe', methods=['POST'])
-def upload_audio():
-    audio_file = request.files['audio']
-    filename = os.path.join("database", audio_file.filename)
-    audio_file.save(filename)
-    return jsonify({'message': transcribe(filename)})
 def run_app():
     app.run(port=7777)
 if __name__ == "__main__":
     runner = threading.Thread(target=run_app)
-    runner.start()

 model = whisper.load_model("base")
 def transcribe(audio):
     result = model.transcribe(audio)
     transcription = result['text']
     print(transcription)
     return transcription
 app = Flask(__name__)
+@app.route('/', methods=['POST', 'GET'])
+def home():
+    return jsonify({'message': 'WORKING'})
+def handle_request(func, *args):
     try:
+        result = func(*args)
+        return jsonify({'message': result})
     except Exception as e:
+        print(e)
         return jsonify({'error': str(e)})
+@app.route('/locate_object', methods=['POST', 'GET'])
+def _locate_object():
+    image_data = request.json['image']
+    prompt = request.json['prompt']
+    image_data = np.array(image_data, dtype=np.uint8)
+    image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
+    cv2.imwrite('API.jpg', image)
+    return handle_request(locate_object, prompt, "API.jpg")
+@app.route('/vqa', methods=['POST', 'GET'])
+def _vqa():
+    image_data = request.json['image']
+    prompt = request.json['prompt']
+    image_data = np.array(image_data, dtype=np.uint8)
+    image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
+    cv2.imwrite('API.jpg', image)
+    return handle_request(answer_question, prompt, "API.jpg")
+@app.route('/object_description', methods=['POST', 'GET'])
+def _object_description():
+    image_data = request.json['image']
+    image_data = np.array(image_data, dtype=np.uint8)
+    image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
+    cv2.imwrite('API.jpg', image)
+    return handle_request(find_object_description, "API.jpg")
+@app.route('/perceptrix', methods=['POST', 'GET'])
+def _perceptrix():
+    prompt = request.json['prompt']
+    return handle_request(perceptrix, prompt)
+@app.route('/robotix', methods=['POST', 'GET'])
+def _robotix():
+    prompt = request.json['prompt']
+    return handle_request(robotix, prompt)
+@app.route('/search_keyword', methods=['POST', 'GET'])
+def _search_keyword():
+    prompt = request.json['prompt']
+    return handle_request(search_keyword, prompt)
+@app.route('/identify_objects_from_text', methods=['POST', 'GET'])
+def _identify_objects_from_text():
+    prompt = request.json['prompt']
+    return handle_request(identify_objects_from_text, prompt)
+@app.route('/transcribe', methods=['POST', 'GET'])
+def _upload_audio():
     try:
+        audio_file = request.files['audio']
+        filename = os.path.join("./", audio_file.filename)
+        audio_file.save(filename)
+        print("RECEIVED")
+        return jsonify({'message': transcribe(filename)})
     except Exception as e:
+        print(e)
+        return jsonify({'message': "Error"})
 def run_app():
     app.run(port=7777)
 if __name__ == "__main__":
     runner = threading.Thread(target=run_app)
+    runner.start()

crystal.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from utils import setup_device, check_api_usage, record_chat, load_chat, get_time
-from SoundScribe.speakerID import find_user
 from SoundScribe import speak, live_listen
 from internet import get_weather_data
 import threading
@@ -30,7 +30,7 @@ AUTOMATION_COMAND = "Home Automation"
 weather = None
 device = setup_device()
-print("INITIALIZING CRYSTAL -", "Running on" if str(device) == "cpu" else "Running with", str(device).upper(),
       "Acceleration" if str(device) != "cpu" else "")
@@ -51,7 +51,6 @@ def understand_surroundings():
     while True:
         current_events = answer_question(
             "Describe your surroundings", "./database/current_frame.jpg")
         time.sleep(10)
@@ -134,7 +133,8 @@ visual_processing.start()
 while True:
     full_history = load_chat()
-    username = find_user("recording.wav")
     with open("database/notes.txt", "r") as notes:
         notes = notes.read()
     input_text = f"Time- {get_time()}\nWeather- {weather}\nSurroundings- {current_events}"+(f"\nNotes- {notes}" if notes else "")
@@ -171,6 +171,7 @@ while True:
         relevant_history = f"{relevant_history}\n{username}: " + \
             "\n" + input_text + "\nCRYSTAL: "
         response = str(perceptrix(relevant_history))
         with open("./database/input.txt", 'w') as clearfile:
             clearfile.write("")

 from utils import setup_device, check_api_usage, record_chat, load_chat, get_time
+# from SoundScribe.speakerID import find_user
 from SoundScribe import speak, live_listen
 from internet import get_weather_data
 import threading
 weather = None
 device = setup_device()
+print("INITIALIZING CRYSTAL - DETECTED DEVICE:", str(device).upper(),
       "Acceleration" if str(device) != "cpu" else "")
     while True:
         current_events = answer_question(
             "Describe your surroundings", "./database/current_frame.jpg")
         time.sleep(10)
 while True:
     full_history = load_chat()
+    # username = find_user("recording.wav")
+    username = "Vatsal"
     with open("database/notes.txt", "r") as notes:
         notes = notes.read()
     input_text = f"Time- {get_time()}\nWeather- {weather}\nSurroundings- {current_events}"+(f"\nNotes- {notes}" if notes else "")
         relevant_history = f"{relevant_history}\n{username}: " + \
             "\n" + input_text + "\nCRYSTAL: "
         response = str(perceptrix(relevant_history))
+        response = "<###CRYSTAL-INTERNAL###> Speech\n"+response
         with open("./database/input.txt", 'w') as clearfile:
             clearfile.write("")

database/current_frame.jpg CHANGED Viewed

database/input.txt CHANGED Viewed

	@@ -1 +1 @@
1	- No.


1	+ Hello. Can you hear me?

database/notes.txt ADDED Viewed

File without changes

database/recording.wav CHANGED Viewed

Binary files a/database/recording.wav and b/database/recording.wav differ

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 absl-py
 accelerate
 addict
@@ -109,7 +110,6 @@ gql
 graphql-core
 greenlet
 grpcio
-gTTS
 gunicorn
 h11
 h2

+git+https://github.com/coqui-ai/TTS
 absl-py
 accelerate
 addict
 graphql-core
 greenlet
 grpcio
 gunicorn
 h11
 h2

utils.py CHANGED Viewed

@@ -58,6 +58,11 @@ def check_api_usage():
                 "\t2. Use On-Device Calculations\n"
                 "Enter your choice (1/2): ")
             USE_CLOUD_API = choice == "1"
         else:
             print("CRYSTAL Cloud API not reachable.")
     else:
@@ -68,7 +73,7 @@ def check_api_usage():
 def perceptrix(prompt):
-    url = API_URL+"perceptrix/"
     payload = {'prompt': prompt}
     headers = {'Content-Type': 'application/json'}
@@ -78,7 +83,7 @@ def perceptrix(prompt):
 def robotix(prompt):
-    url = API_URL+"robotix/"
     payload = {'prompt': prompt}
     headers = {'Content-Type': 'application/json'}
@@ -88,7 +93,7 @@ def robotix(prompt):
 def identify_objects_from_text(prompt):
-    url = API_URL+"identify_objects_from_text/"
     payload = {'prompt': prompt}
     headers = {'Content-Type': 'application/json'}
@@ -98,7 +103,7 @@ def identify_objects_from_text(prompt):
 def search_keyword(prompt):
-    url = API_URL+"search_keyword/"
     payload = {'prompt': prompt}
     headers = {'Content-Type': 'application/json'}
@@ -108,7 +113,10 @@ def search_keyword(prompt):
 def answer_question(prompt, frame):
-    url = API_URL+"vqa/"
     _, image_data = cv2.imencode('.jpg', frame)
     image = image_data.tolist()
@@ -121,7 +129,9 @@ def answer_question(prompt, frame):
 def find_object_description(prompt, frame):
-    url = API_URL+"object_description/"
     _, image_data = cv2.imencode('.jpg', frame)
     image = image_data.tolist()
@@ -134,7 +144,9 @@ def find_object_description(prompt, frame):
 def locate_object(prompt, frame):
-    url = API_URL+"locate_object/"
     _, image_data = cv2.imencode('.jpg', frame)
     image = image_data.tolist()

                 "\t2. Use On-Device Calculations\n"
                 "Enter your choice (1/2): ")
             USE_CLOUD_API = choice == "1"
+            if USE_CLOUD_API:
+                print("RUNNING ON CLOUD")
+            else:
+                print("RUNNING LOCALLY")
         else:
             print("CRYSTAL Cloud API not reachable.")
     else:
 def perceptrix(prompt):
+    url = API_URL+"perceptrix"
     payload = {'prompt': prompt}
     headers = {'Content-Type': 'application/json'}
 def robotix(prompt):
+    url = API_URL+"robotix"
     payload = {'prompt': prompt}
     headers = {'Content-Type': 'application/json'}
 def identify_objects_from_text(prompt):
+    url = API_URL+"identify_objects_from_text"
     payload = {'prompt': prompt}
     headers = {'Content-Type': 'application/json'}
 def search_keyword(prompt):
+    url = API_URL+"search_keyword"
     payload = {'prompt': prompt}
     headers = {'Content-Type': 'application/json'}
 def answer_question(prompt, frame):
+    url = API_URL+"vqa"
+    if type(frame) == str:
+        frame = cv2.imread(frame)
     _, image_data = cv2.imencode('.jpg', frame)
     image = image_data.tolist()
 def find_object_description(prompt, frame):
+    url = API_URL+"object_description"
+    if type(frame) == str:
+        frame = cv2.imread(frame)
     _, image_data = cv2.imencode('.jpg', frame)
     image = image_data.tolist()
 def locate_object(prompt, frame):
+    url = API_URL+"locate_object"
+    if type(frame) == str:
+        frame = cv2.imread(frame)
     _, image_data = cv2.imencode('.jpg', frame)
     image = image_data.tolist()