import json import backoff import openai # Ollama ollama_choices = [ "mistral-nemo", "llama3.1", "qwen2.5:32b" ] # hyperbolic hyperbolic_choices = [ "Qwen/Qwen2.5-72B-Instruct", "meta-llama/Meta-Llama-3.1-70B-Instruct", ] allchoices = [ "Qwen/Qwen2.5-72B-Instruct", "deepseek-ai/DeepSeek-V2.5", "claude-3-5-sonnet-20240620", "gpt-4o-2024-05-13", "deepseek-coder-v2-0724", "llama3.1-405b", # Anthropic Claude models via Amazon Bedrock "bedrock/anthropic.claude-3-sonnet-20240229-v1:0", "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", "bedrock/anthropic.claude-3-haiku-20240307-v1:0", "bedrock/anthropic.claude-3-opus-20240229-v1:0", ] for item in ollama_choices: allchoices.append("ollama/" + item) for item in hyperbolic_choices: allchoices.append("hyperbolic/" + item) # Get N responses from a single message, used for ensembling. @backoff.on_exception(backoff.expo, (openai.RateLimitError, openai.APITimeoutError)) def get_batch_responses_from_llm( msg, client, model, system_message, print_debug=False, msg_history=None, temperature=0.75, n_responses=1, ): if msg_history is None: msg_history = [] if model in [ "gpt-4o-2024-05-13", "gpt-4o-mini-2024-07-18", "gpt-4o-2024-08-06", "Qwen/Qwen2.5-72B-Instruct" ]: new_msg_history = msg_history + [{"role": "user", "content": msg}] response = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": system_message}, *new_msg_history, ], temperature=temperature, max_tokens=3000, n=n_responses, stop=None, seed=0, ) content = [r.message.content for r in response.choices] new_msg_history = [ new_msg_history + [{"role": "assistant", "content": c}] for c in content ] elif model == "deepseek-coder-v2-0724": new_msg_history = msg_history + [{"role": "user", "content": msg}] response = client.chat.completions.create( model="deepseek-coder", messages=[ {"role": "system", "content": system_message}, *new_msg_history, ], temperature=temperature, max_tokens=3000, n=n_responses, stop=None, ) content = [r.message.content for r in response.choices] new_msg_history = [ new_msg_history + [{"role": "assistant", "content": c}] for c in content ] # ------------------------------------------------------------------------------------------------------ elif model == "Qwen/Qwen2.5-72B-Instruct": new_msg_history = msg_history + [{"role": "user", "content": msg}] response = client.chat.completions.create( model="Qwen/Qwen2.5-72B-Instruct", messages=[ {"role": "system", "content": system_message}, *new_msg_history, ], temperature=temperature, max_tokens=3000, n=n_responses, stop=None, ) content = [r.message.content for r in response.choices] new_msg_history = [ new_msg_history + [{"role": "assistant", "content": c}] for c in content ] # elif model in hyperbolic_choices: # content, new_msg_history = [], [] # for i in range(n_responses): # print(f"Getting {i+1}/{n_responses} response from {model}") # c, hist = get_response_from_llm( # msg, # client, # model, # system_message, # print_debug=False, # msg_history=None, # temperature=temperature, # ) # content.append(c) # new_msg_history.append(hist) # ------------------------------------------------------------------------------------------------------ elif model == "llama-3-1-405b-instruct": new_msg_history = msg_history + [{"role": "user", "content": msg}] response = client.chat.completions.create( model="meta-llama/llama-3.1-405b-instruct", messages=[ {"role": "system", "content": system_message}, *new_msg_history, ], temperature=temperature, max_tokens=3000, n=n_responses, stop=None, ) content = [r.message.content for r in response.choices] new_msg_history = [ new_msg_history + [{"role": "assistant", "content": c}] for c in content ] elif model == "claude-3-5-sonnet-20240620": content, new_msg_history = [], [] for _ in range(n_responses): c, hist = get_response_from_llm( msg, client, model, system_message, print_debug=False, msg_history=None, temperature=temperature, ) content.append(c) new_msg_history.append(hist) # ollama models elif model in ollama_choices: content, new_msg_history = [], [] for i in range(n_responses): print(f"Getting {i+1}/{n_responses} response from {model}") c, hist = get_response_from_llm( msg, client, model, system_message, print_debug=False, msg_history=None, temperature=temperature, ) content.append(c) new_msg_history.append(hist) else: raise ValueError(f"Model {model} not supported.") if print_debug: # Just print the first one. print() print("*" * 20 + " LLM START " + "*" * 20) for j, msg in enumerate(new_msg_history[0]): print(f'{j}, {msg["role"]}: {msg["content"]}') print(content) print("*" * 21 + " LLM END " + "*" * 21) print() return content, new_msg_history @backoff.on_exception(backoff.expo, (openai.RateLimitError, openai.APITimeoutError)) def get_response_from_llm( msg, client, model, system_message, print_debug=False, msg_history=None, temperature=0.75, ): if msg_history is None: msg_history = [] if model == "claude-3-5-sonnet-20240620": new_msg_history = msg_history + [ { "role": "user", "content": [ { "type": "text", "text": msg, } ], } ] response = client.messages.create( model="claude-3-5-sonnet-20240620", max_tokens=3000, temperature=temperature, system=system_message, messages=new_msg_history, ) content = response.content[0].text new_msg_history = new_msg_history + [ { "role": "assistant", "content": [ { "type": "text", "text": content, } ], } ] # ------------------------------------------------------------------------------------------------------ elif model in [ "gpt-4o-2024-05-13", "gpt-4o-mini-2024-07-18", "gpt-4o-2024-08-06", "Qwen/Qwen2.5-72B-Instruct" ]: new_msg_history = msg_history + [{"role": "user", "content": msg}] response = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": system_message}, *new_msg_history, ], temperature=temperature, max_tokens=3000, n=1, stop=None, seed=0, ) content = response.choices[0].message.content new_msg_history = new_msg_history + [{"role": "assistant", "content": content}] # ------------------------------------------------------------------------------------------------------ elif model in ["meta-llama/llama-3.1-405b-instruct", "llama-3-1-405b-instruct"]: new_msg_history = msg_history + [{"role": "user", "content": msg}] response = client.chat.completions.create( model="meta-llama/llama-3.1-405b-instruct", messages=[ {"role": "system", "content": system_message}, *new_msg_history, ], temperature=temperature, max_tokens=3000, n=1, stop=None, ) content = response.choices[0].message.content new_msg_history = new_msg_history + [{"role": "assistant", "content": content}] elif model in ollama_choices: new_msg_history = msg_history + [{"role": "user", "content": msg}] response = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": system_message}, *new_msg_history, ], temperature=temperature, max_tokens=6000, n=1, stop=None, seed=0, ) content = response.choices[0].message.content # print("\nget_response_from_llm\n") # print(content) new_msg_history = new_msg_history + [{"role": "assistant", "content": content}] else: raise ValueError(f"Model {model} not supported.") if print_debug: print() print("*" * 20 + " LLM START " + "*" * 20) for j, msg in enumerate(new_msg_history): print(f'{j}, {msg["role"]}: {msg["content"]}') print(content) print("*" * 21 + " LLM END " + "*" * 21) print() return content, new_msg_history def llm_json_auto_correct(system_prompt: str, user_prompt: str) -> str: import os client = openai.OpenAI(api_key=os.environ["OPENAI_API_KEY"], base_url="https://api.hyperbolic.xyz/v1") response = client.chat.completions.create( model="Qwen/Qwen2.5-72B-Instruct", temperature=0, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], ) return response.choices[0].message.content def extract_json_between_markers(llm_output): json_start_marker = "```json" json_end_marker = "```" # Find the start and end indices of the JSON string start_index = llm_output.find(json_start_marker) if start_index != -1: start_index += len(json_start_marker) # Move past the marker end_index = llm_output.find(json_end_marker, start_index) else: return None # JSON markers not found if end_index == -1: return None # End marker not found # Extract the JSON string json_string = llm_output[start_index:end_index].strip() # print(json_string) try: parsed_json = json.loads(json_string) return parsed_json except json.JSONDecodeError: return None # Invalid JSON format