Spaces:

Cosmos-AI
/

Cosmos-BETA-v1

Runtime error

App Files Files Community

wop commited on Mar 10

Commit

5f05d95

•

1 Parent(s): a802a63

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -20

app.py CHANGED Viewed

@@ -1,12 +1,15 @@
 import os
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
-import gradio as gr
-import pandas as pd
 from datasets import load_dataset
-from tensorflow.keras.models import load_model
 from tensorflow.keras.preprocessing.text import Tokenizer
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 # Load dataset
 dataset = load_dataset("Cosmos-AI/Cosmos-dataset")
@@ -18,16 +21,40 @@ dataset_df = pd.DataFrame(dataset['train'])  # Assuming 'train' split contains b
 questions = dataset_df['Question'].astype(str).tolist()
 answers = dataset_df['Answer'].astype(str).tolist()
-# Load tokenizer
-tokenizer = Tokenizer()
 tokenizer.fit_on_texts(questions + answers)
 word_index = tokenizer.word_index
-# Load trained model
-model = load_model("conversation_model.h5")
-# Define max_sequence_length
-max_sequence_length = 31  # do not change!!!
 # Function to generate response
 def generate_response(input_text):
@@ -52,14 +79,9 @@ def generate_response(input_text):
     return response.strip()
-# Define Gradio interface
-iface = gr.Interface(
-    fn=generate_response,
-    inputs="text",
-    outputs="text",
-    title="Conversation Model",
-    description="Enter your message and get a response from the conversational model."
-)
-# Launch the interface
-iface.launch()

 import os
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 from datasets import load_dataset
+import pandas as pd
+import numpy as np
+from tensorflow.keras.models import Sequential, load_model
+from tensorflow.keras.layers import Embedding, LSTM, Dense
 from tensorflow.keras.preprocessing.text import Tokenizer
 from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.losses import sparse_categorical_crossentropy
+import gradio as gr
 # Load dataset
 dataset = load_dataset("Cosmos-AI/Cosmos-dataset")
 questions = dataset_df['Question'].astype(str).tolist()
 answers = dataset_df['Answer'].astype(str).tolist()
+# Tokenize input data
+tokenizer = Tokenizer(lower=False, oov_token="<unk>", filters='\t\n')
 tokenizer.fit_on_texts(questions + answers)
 word_index = tokenizer.word_index
+# Convert text sequences to numerical sequences
+question_sequences = tokenizer.texts_to_sequences(questions)
+answer_sequences = tokenizer.texts_to_sequences(answers)
+# Pad sequences to ensure uniform length
+max_sequence_length = max(len(seq) for seq in question_sequences + answer_sequences)
+print("MAX SEQUENCE LENGTH: " + str(max_sequence_length))
+question_sequences = pad_sequences(question_sequences, maxlen=max_sequence_length, padding='post')
+answer_sequences = pad_sequences(answer_sequences, maxlen=max_sequence_length, padding='post')
+# Convert target sequences to one-hot encoding
+num_words = len(word_index) + 1
+one_hot_answers = np.zeros((len(answer_sequences), max_sequence_length, num_words), dtype=np.float32)
+for i, sequence in enumerate(answer_sequences):
+    for t, index in enumerate(sequence):
+        one_hot_answers[i, t, index] = 1
+# Define model
+model = Sequential([
+    Embedding(len(word_index) + 1, 64, input_length=max_sequence_length),
+    LSTM(64, return_sequences=True),
+    Dense(len(word_index) + 1, activation='softmax')
+])
+# Compile model with correct loss function
+model.compile(loss=sparse_categorical_crossentropy, optimizer='adam', metrics=['accuracy'])
+# Train model
+model.fit(question_sequences, answer_sequences, epochs=1000, batch_size=32, steps_per_epoch=1)
 # Function to generate response
 def generate_response(input_text):
     return response.strip()
+# Interface
+input_text = gr.inputs.Textbox(label="Input Text")
+output_text = gr.outputs.Textbox(label="Output Text")
+gr.Interface(fn=generate_response, inputs=input_text, outputs=output_text, title="Conversation Model", description="Enter your question and get a response.").launch()