Spaces:
Sleeping
Sleeping
File size: 6,054 Bytes
50f4808 db75f2c 50f4808 db75f2c ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 db75f2c 15939d8 db75f2c 50f4808 db75f2c 50f4808 db75f2c 50f4808 db75f2c 15939d8 db75f2c 50f4808 db75f2c 50f4808 db75f2c 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a db75f2c 50f4808 ddecd6a db75f2c 50f4808 59eab61 ddecd6a 59eab61 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 9825b5e 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 ddecd6a 50f4808 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import pandas as pd
import os
import gradio as gr
import threading
import time
from groq import Groq
# Initialize Groq client
client = Groq()
# Constants
MAX_SIZE = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes
DATA_DIRECTORY = 'data'
UPDATE_INTERVAL = 1 # Update interval in seconds
# Ensure the data directory exists
os.makedirs(DATA_DIRECTORY, exist_ok=True)
# Initialize variables
file_index = 1
current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
file_paths = [current_file]
combined_tokens = 0
# Helper function to get file size
def get_file_size(filename):
return os.path.getsize(filename) if os.path.isfile(filename) else 0
# Data generation and saving function
def generate_and_save_data():
global file_index, current_file, file_paths, combined_tokens
# Create the initial file if it doesn't exist
if not os.path.isfile(current_file):
pd.DataFrame(columns=["prompt", "response"]).to_csv(current_file, index=False)
while True:
try:
# Generate a prompt
completion = client.chat.completions.create(
model="llama3-groq-70b-8192-tool-use-preview",
messages=[
{
"role": "user",
"content": "give me a single prompt to prompt an ai model, simulating what users could want from you. ensure that it is diverse and high quality. for each, choose a random writing style (though it has to be a common one), random length and random clarity of the prompt. ensure that it is a single prompt, and just the prompt itself, nothing else. eg, don't close the prompt in quotation marks or say Here is a single prompt that meets your requirements or anything similar to that"
}
],
temperature=1,
max_tokens=1024,
top_p=1,
stream=True,
stop=None,
)
prompt = ""
prompt_tokens = 0
for chunk in completion:
content = chunk.choices[0].delta.content
if content:
prompt += content
prompt_tokens += len(content.split())
# Use the generated prompt to query the model again
second_completion = client.chat.completions.create(
model="llama3-groq-70b-8192-tool-use-preview",
messages=[
{
"role": "user",
"content": prompt
}
],
temperature=1,
max_tokens=5000,
top_p=1,
stream=True,
stop=None,
)
response = ""
response_tokens = 0
for chunk in second_completion:
content = chunk.choices[0].delta.content
if content:
response += content
response_tokens += len(content.split())
# Update the combined token count
combined_tokens += (prompt_tokens + response_tokens)
# Print the generated prompt and the response
print("Generated prompt:", prompt)
print("Response to the generated prompt:", response)
# Create a DataFrame with the prompt and response
data = pd.DataFrame({"prompt": [prompt], "response": [response]})
# Check the size of the current file
if get_file_size(current_file) >= MAX_SIZE:
file_index += 1
current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
file_paths.append(current_file)
# Create the new file with headers
with open(current_file, 'w') as f:
data.to_csv(f, header=True, index=False)
else:
# Append data to the current file
with open(current_file, 'a') as f:
data.to_csv(f, header=False, index=False)
# Wait for the next update interval
time.sleep(UPDATE_INTERVAL)
except Exception as e:
print(f"An error occurred: {e}. Retrying in 5 seconds...")
time.sleep(5)
# Get available files
def get_available_files():
return [f for f in file_paths if os.path.isfile(f)]
# Update file list
def update_file_list():
return gr.update(choices=get_available_files())
# Update token count
def update_token_count():
return combined_tokens
# Display file content
def display_file_content(selected_file):
if selected_file:
return pd.read_csv(selected_file)
return pd.DataFrame()
# Start the data generation in a separate thread
thread = threading.Thread(target=generate_and_save_data)
thread.daemon = True
thread.start()
# Create Gradio interface
with gr.Blocks() as app:
gr.Markdown("## AI Prompt and Response Generator")
gr.Markdown("This app continuously generates AI prompts and responses, and writes them to CSV files.")
file_selector = gr.Dropdown(label="Select a data file to view and download", choices=get_available_files())
file_viewer = gr.DataFrame(label="CSV File Content")
download_button = gr.File(label="Download Selected File")
def download_file(selected_file):
return selected_file
refresh_button = gr.Button("Refresh File List")
refresh_button.click(update_file_list, outputs=file_selector)
file_selector.change(display_file_content, inputs=file_selector, outputs=file_viewer)
file_selector.change(download_file, inputs=file_selector, outputs=download_button)
token_display = gr.Textbox(label="Combined Tokens", value=str(update_token_count()), interactive=False)
def update_token_display():
return str(update_token_count())
# Update the token count every second
token_refresh = gr.Button("Refresh Token Count")
token_refresh.click(update_token_display, outputs=token_display)
app.launch()
|