import uvicorn from fastapi import FastAPI, HTTPException, Request import os import torch import optimum from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from fastapi.responses import HTMLResponse from datetime import datetime import random import string import json # Set environment variables for GPU usage and memory allocation os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True' torch.cuda.empty_cache() torch.cuda.set_per_process_memory_fraction(0.8) # Adjust the fraction as needed # Initialize FastAPI application app = FastAPI() conversations = {} device = "cuda" # the device to load the model onto system_message =f''' Ivana, a vivacious 20-year-old petite redhead model and avid League of Legends gamer from the USA, invites you to join her ig, showcasing her vibrant personality and style whether day gaming or playfully dressed for nighttime gaming sessions. Ivana is a beautiful woman speaking with a man she matched on a dating app to have him follow her on Instagram. She wants to be flirty with the man and get to know him better, building a connection. She wants to find out things like where the guy is from, how old he is, and what he is doing rn. If he is not doing much and he is in a private space, she wants to seduce him by doing some dirty talk. Guy - Hiii Ivana - Hi:) How are you? Guy - All good, where are you from? Ivana - from the USA and you? Guy - I am from Texas, USA hahah, so why are you in this app? Ivana - Kinda looking for some fun hahah. What are you doing rn? Guy - hahah not much alone in my room Ivana - I am also kinda bored but I got an idea. wanna have some fun? Guy - sure hahah how do you look btw? Ivana - I am cute hahah add me on ig at ivana_baddiex - Chatting Usual Routine 1. always ask an open-ended question to continue the conversation. 2. ask him 1 generic question (how old are you, where are you from) 3. ask him what he is doing right now 4. if he is bored, propose him the idea of doing something interesting. 5. if he is up for it, send him the Instagram Very important Rules to reply - try to reply using the same amount of characters of the question - don't use any emojis in your replies but you can do like hahah or :) instead Objection Handling and Common Questions Replies - Lets chat here→ you say hahah I have too many chats here I am thinking to delete the app lets move to ig if you are serious - are you going to ask me for money?→ you say wtf I am not that kind of girl! please be a gentleman! - how are you? → you say I just came back from the gym about to take a shower bored on the bed - what are you doing? → you say I just came back from the gym about to take a shower bored on the bed - can I have a picture?→ not here lets talk on ig so I can see you too ahhaha - if he is at work —> hahah ok let me not bother you btw let's keep in touch this is my ig - if he tells you something like “added” “Just followed you” → okiiii hahah - if he tells you something you don't understand for example “*****” → hi there sorry went offline for a while hahah - never say the word Instagram directly but use things like ig instead - if the guy says he's from a particular city —> you say Oh wow I am also from there! we should do something hahah - if the guy triggers your profanity filters or starts to talk dirty say → hey lets talk on ig I am not comfortable here hahah - if the guy does not want to add you on ig → no worries hahah wanna call me here? - don't use any emojis in your replies but you can do like hahah or :) instead - try to reply using the same amount of characters of the question - if the guy says you are far → hahah I travel a lot though. let's talk on ig? this is my handle. ''' def hermes_model(): tokenizer = AutoTokenizer.from_pretrained("TheBloke/CapybaraHermes-2.5-Mistral-7B-AWQ") model = AutoModelForCausalLM.from_pretrained("TheBloke/CapybaraHermes-2.5-Mistral-7B-AWQ",low_cpu_mem_usage=True,device_map="auto") return model, tokenizer model, tokenizer = hermes_model() def generate_id(length=5): """ Generates a random alphanumeric ID. Args: length (int): The length of the ID. Returns: str: A random alphanumeric ID. """ return ''.join(random.choices(string.ascii_letters + string.digits, k=length)) def generate_thread_id(): """ Generates a unique thread ID for each conversation. Returns: str: A unique thread ID. """ return generate_id() def generate_message_id(): """ Generates a random alphanumeric message ID. Returns: str: A random alphanumeric message ID. """ return generate_id() def chat_response(msg_prompt: str) -> dict: """ Generates a response from the model given a prompt. Args: msg_prompt (str): The user's message prompt. Returns: dict: A dictionary containing the user's message prompt and the model's response. """ generation_params = {"do_sample": True,"temperature": 0.7,"top_p": 0.95,"top_k": 40,"max_new_tokens": 512,"repetition_penalty": 1.1} pipe = pipeline("text-generation",model=model, tokenizer=tokenizer, **generation_params) try: prompt_template=f'''<|im_start|>system {system_message}<|im_end|> <|im_start|>user {msg_prompt}<|im_end|> <|im_start|>assistant ''' pipe_output = pipe(prompt_template)[0]['generated_text'] # Separate user's prompt and assistant's response response_lines = pipe_output.split('\n') user_prompt = response_lines[0].strip() assistant_response = response_lines[-1].strip() return {"user": msg_prompt, "assistant": assistant_response} except Exception as e: return {"error": str(e)} def thread_response(msg_history: str, msg_prompt: str) -> dict: """ Generates a response from the model using the system prompt written above. Args: msg_prompt (str): The user's message prompt. Returns: dict: A dictionary containing the user's message prompt and the model's response. """ # Validate input parameters if not msg_prompt: raise ValueError("Message prompt cannot be empty.") generation_params = {"do_sample": True,"temperature": 0.7,"top_p": 0.95,"top_k": 40,"max_new_tokens": 512,"repetition_penalty": 1.1} pipe = pipeline("text-generation",model=model, tokenizer=tokenizer, **generation_params) instructions= f''' incase you are asked about something you talked about the previous days or times or periods, use this as reference to recall {msg_history}. Do not point out everything just major keypoints. If nothing is mentioned about the past conversation ignore this. ''' if msg_history: prompt_template=f''' <|im_start|>system{system_message}{instructions}<|im_end|> <|im_start|>user {msg_prompt}<|im_end|> <|im_start|>assistant ''' else: prompt_template=f''' <|im_start|>system{system_message}<|im_end|> <|im_start|>user {msg_prompt}<|im_end|> <|im_start|>assistant ''' try: pipe_output = pipe(prompt_template)[0]['generated_text'] # Separate user's prompt and assistant's response response_lines = pipe_output.split('\n') user_prompt = response_lines[0].strip() assistant_response = response_lines[-1].strip() return {"user": msg_prompt, "assistant": assistant_response} except Exception as e: return {"error": str(e)} def chat_thread_response(ai_persona: str,msg_history: str, msg_prompt: str) -> dict: """ Generates a response based on the provided persona description prompt and user message prompt. Args: instructions_prompt (str): The persona description prompt. msg_prompt (str): The user's message prompt. Returns: dict: A dictionary containing the user's msg_prompt and the model's response. """ # Validate input parameters if not msg_prompt: raise ValueError("Message prompt cannot be empty.") generation_params = {"do_sample": True,"temperature": 0.7,"top_p": 0.95,"top_k": 40,"max_new_tokens": 512,"repetition_penalty": 1.1} pipe = pipeline("text-generation",model=model, tokenizer=tokenizer, **generation_params) instructions= f''' Incase you are asked about something you talked about the previous days or times or periods, use this as reference to recall {msg_history}. Do not point out everything just major keypoints. If nothing is mentioned about the past conversation ignore this. ''' if msg_history: prompt_template=f''' <|im_start|>system{ai_persona}{instructions}<|im_end|> <|im_start|>user {msg_prompt}<|im_end|> <|im_start|>assistant ''' else: prompt_template=f''' <|im_start|>system{ai_persona}<|im_end|> <|im_start|>user {msg_prompt}<|im_end|> <|im_start|>assistant ''' try: # Generate response using the pipeline pipe_output = pipe(prompt_template)[0]['generated_text'] # Separate user's prompt and assistant's response response_lines = pipe_output.split('\n') user_prompt = response_lines[0].strip() assistant_response = response_lines[-1].strip() # Return user prompt and assistant response return {"user": msg_prompt, "assistant": assistant_response} except Exception as e: # Return error message if an exception occurs return {"error": str(e)} @app.get("/", tags=["Home"]) async def api_home(): """ Home endpoint of the API. Returns: HTMLResponse: An HTML welcome message. """ html_content = """